From 3f005e7de3db8d0b3f7a1f399aa061dc35b65864 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 Jul 2016 18:12:21 +0100
Subject: perf/core: Sched out groups atomically

Groups of events are supposed to be scheduled atomically, such that it
is possible to derive meaningful ratios between their values.

We take great pains to achieve this when scheduling event groups to a
PMU in group_sched_in(), calling {start,commit}_txn() (which fall back
to perf_pmu_{disable,enable}() if necessary) to provide this guarantee.
However we don't mirror this in group_sched_out(), and in some cases
events will not be scheduled out atomically.

For example, if we disable an event group with PERF_EVENT_IOC_DISABLE,
we'll cross-call __perf_event_disable() for the group leader, and will
call group_sched_out() without having first disabled the relevant PMU.
We will disable/enable the PMU around each pmu->del() call, but between
each call the PMU will be enabled and events may count.

Avoid this by explicitly disabling and enabling the PMU around event
removal in group_sched_out(), mirroring what we do in group_sched_in().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1469553141-28314-1-git-send-email-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1903b8f..11f6bbe 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1796,6 +1796,8 @@ group_sched_out(struct perf_event *group_event,
 	struct perf_event *event;
 	int state = group_event->state;
 
+	perf_pmu_disable(ctx->pmu);
+
 	event_sched_out(group_event, cpuctx, ctx);
 
 	/*
@@ -1804,6 +1806,8 @@ group_sched_out(struct perf_event *group_event,
 	list_for_each_entry(event, &group_event->sibling_list, group_entry)
 		event_sched_out(event, cpuctx, ctx);
 
+	perf_pmu_enable(ctx->pmu);
+
 	if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
 		cpuctx->exclusive = 0;
 }
-- 
cgit v0.10.2


From 09e61b4f78498bd9f213b0a536e80b79507ea89f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Jul 2016 18:02:43 +0200
Subject: perf/x86/intel: Rework the large PEBS setup code

In order to allow optimizing perf_pmu_sched_task() we must ensure
perf_sched_cb_{inc,dec}() are no longer called from NMI context; this
means that pmu::{start,stop}() can no longer use them.

Prepare for this by reworking the whole large PEBS setup code.

The current code relied on the cpuc->pebs_enabled state, however since
that reflects the current active state as per pmu::{start,stop}() we
can no longer rely on this.

Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which
count the total number of PEBS events and the number of PEBS events
that have FREERUNNING set, resp.. With this we can tell if the current
setup requires a single record interrupt threshold or can use a larger
buffer.

This also improves the code in that it re-enables the large threshold
once the PEBS event that required single record gets removed.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7ce9f3f..c791ff9 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
+{
+	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+	struct debug_store *ds = cpuc->ds;
+	u64 threshold;
+
+	if (cpuc->n_pebs == cpuc->n_large_pebs) {
+		threshold = ds->pebs_absolute_maximum -
+			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+	} else {
+		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+	}
+
+	ds->pebs_interrupt_threshold = threshold;
+}
+
+static void
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+{
+	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+		if (!needed_cb)
+			perf_sched_cb_inc(pmu);
+		else
+			perf_sched_cb_dec(pmu);
+
+		pebs_update_threshold(cpuc);
+	}
+}
+
+static void intel_pmu_pebs_add(struct perf_event *event)
 {
-	return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+	cpuc->n_pebs++;
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+		cpuc->n_large_pebs++;
+
+	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +862,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
-	bool first_pebs;
-	u64 threshold;
+
+	intel_pmu_pebs_add(event);
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-	first_pebs = !pebs_is_enabled(cpuc);
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 		cpuc->pebs_enabled |= 1ULL << 63;
 
 	/*
-	 * When the event is constrained enough we can use a larger
-	 * threshold and run the event with less frequent PMI.
+	 * Use auto-reload if possible to save a MSR write in the PMI.
+	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
 	 */
-	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-		threshold = ds->pebs_absolute_maximum -
-			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-		if (first_pebs)
-			perf_sched_cb_inc(event->ctx->pmu);
-	} else {
-		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-		/*
-		 * If not all events can use larger buffer,
-		 * roll back to threshold = 1
-		 */
-		if (!first_pebs &&
-		    (ds->pebs_interrupt_threshold > threshold))
-			perf_sched_cb_dec(event->ctx->pmu);
-	}
-
-	/* Use auto-reload if possible to save a MSR write in the PMI */
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
 		ds->pebs_event_reset[hwc->idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
 	}
+}
+
+static void intel_pmu_pebs_del(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+	cpuc->n_pebs--;
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+		cpuc->n_large_pebs--;
 
-	if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-		ds->pebs_interrupt_threshold = threshold;
+	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	struct debug_store *ds = cpuc->ds;
-	bool large_pebs = ds->pebs_interrupt_threshold >
-		ds->pebs_buffer_base + x86_pmu.pebs_record_size;
 
-	if (large_pebs)
+	if (cpuc->n_pebs == cpuc->n_large_pebs)
 		intel_pmu_drain_pebs_buffer();
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
-	if (large_pebs && !pebs_is_enabled(cpuc))
-		perf_sched_cb_dec(event->ctx->pmu);
-
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+
+	intel_pmu_pebs_del(event);
 }
 
 void intel_pmu_pebs_enable_all(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8c4a477..94b8f27 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -194,6 +194,8 @@ struct cpu_hw_events {
 	 */
 	struct debug_store	*ds;
 	u64			pebs_enabled;
+	int			n_pebs;
+	int			n_large_pebs;
 
 	/*
 	 * Intel LBR bits
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 11f6bbe..57aff71 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2818,6 +2818,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
 /*
  * This function provides the context switch callback to the lower code
  * layer. It is invoked ONLY when the context switch callback is enabled.
+ *
+ * This callback is relevant even to per-cpu events; for example multi event
+ * PEBS requires this to provide PID/TID information. This requires we flush
+ * all queued PEBS records before we context switch to a new task.
  */
 static void perf_pmu_sched_task(struct task_struct *prev,
 				struct task_struct *next,
-- 
cgit v0.10.2


From 68f7082ffb0575154ccdec36109e293174f48a4c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Jul 2016 18:02:43 +0200
Subject: perf/x86: Ensure perf_sched_cb_{inc,dec}() is only called from
 pmu::{add,del}()

Currently perf_sched_cb_{inc,dec}() are called from
pmu::{start,stop}(), which has the problem that this can happen from
NMI context, this is making it hard to optimize perf_pmu_sched_task().

Furthermore, we really only need this accounting on pmu::{add,del}(),
so doing it from pmu::{start,stop}() is doing more work than we really
need.

Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d0efb5c..18a1acf 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1201,6 +1201,9 @@ static int x86_pmu_add(struct perf_event *event, int flags)
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be performed
 	 * at commit time (->commit_txn) as a whole.
+	 *
+	 * If commit fails, we'll call ->del() on all events
+	 * for which ->add() was called.
 	 */
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto done_collect;
@@ -1223,6 +1226,14 @@ done_collect:
 	cpuc->n_added += n - n0;
 	cpuc->n_txn += n - n0;
 
+	if (x86_pmu.add) {
+		/*
+		 * This is before x86_pmu_enable() will call x86_pmu_start(),
+		 * so we enable LBRs before an event needs them etc..
+		 */
+		x86_pmu.add(event);
+	}
+
 	ret = 0;
 out:
 	return ret;
@@ -1346,7 +1357,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
 
 	/*
-	 * If we're called during a txn, we don't need to do anything.
+	 * If we're called during a txn, we only need to undo x86_pmu.add.
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * the event_list.
 	 *
@@ -1354,7 +1365,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	 * an event added during that same TXN.
 	 */
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-		return;
+		goto do_del;
 
 	/*
 	 * Not a TXN, therefore cleanup properly.
@@ -1384,6 +1395,15 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	--cpuc->n_events;
 
 	perf_event_update_userpage(event);
+
+do_del:
+	if (x86_pmu.del) {
+		/*
+		 * This is after x86_pmu_stop(); so we disable LBRs after any
+		 * event can need them etc..
+		 */
+		x86_pmu.del(event);
+	}
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2cbde2f..88792f8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(struct perf_event *event)
 	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
 	cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
-	/*
-	 * must disable before any actual event
-	 * because any event may be combined with LBR
-	 */
-	if (needs_branch_stack(event))
-		intel_pmu_lbr_disable(event);
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc);
 		return;
@@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(struct perf_event *event)
 		intel_pmu_pebs_disable(event);
 }
 
+static void intel_pmu_del_event(struct perf_event *event)
+{
+	if (needs_branch_stack(event))
+		intel_pmu_lbr_del(event);
+	if (event->attr.precise_ip)
+		intel_pmu_pebs_del(event);
+}
+
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 {
 	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		intel_pmu_enable_bts(hwc->config);
 		return;
 	}
-	/*
-	 * must enabled before any actual event
-	 * because any event may be combined with LBR
-	 */
-	if (needs_branch_stack(event))
-		intel_pmu_lbr_enable(event);
 
 	if (event->attr.exclude_host)
 		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struct perf_event *event)
 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
+static void intel_pmu_add_event(struct perf_event *event)
+{
+	if (event->attr.precise_ip)
+		intel_pmu_pebs_add(event);
+	if (needs_branch_stack(event))
+		intel_pmu_lbr_add(event);
+}
+
 /*
  * Save and restart an expired event. Called by NMI contexts,
  * so it has to be careful about preempting normal event ops:
@@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_event,
 	.disable		= intel_pmu_disable_event,
+	.add			= intel_pmu_add_event,
+	.del			= intel_pmu_del_event,
 	.hw_config		= intel_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c791ff9..248023f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -844,7 +844,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
 	}
 }
 
-static void intel_pmu_pebs_add(struct perf_event *event)
+void intel_pmu_pebs_add(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -863,8 +863,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
 
-	intel_pmu_pebs_add(event);
-
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
@@ -884,7 +882,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	}
 }
 
-static void intel_pmu_pebs_del(struct perf_event *event)
+void intel_pmu_pebs_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -916,8 +914,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
-	intel_pmu_pebs_del(event);
 }
 
 void intel_pmu_pebs_enable_all(void)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 707d358..e7b58c2 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -422,7 +422,7 @@ static inline bool branch_user_callstack(unsigned br_sel)
 	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
 }
 
-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_add(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
@@ -450,7 +450,7 @@ void intel_pmu_lbr_enable(struct perf_event *event)
 	perf_sched_cb_inc(event->ctx->pmu);
 }
 
-void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 94b8f27..aa6ea5a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -510,6 +510,8 @@ struct x86_pmu {
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
+	void		(*add)(struct perf_event *);
+	void		(*del)(struct perf_event *);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -890,6 +892,10 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
+void intel_pmu_pebs_add(struct perf_event *event);
+
+void intel_pmu_pebs_del(struct perf_event *event);
+
 void intel_pmu_pebs_enable(struct perf_event *event);
 
 void intel_pmu_pebs_disable(struct perf_event *event);
@@ -908,9 +914,9 @@ u64 lbr_from_signext_quirk_wr(u64 val);
 
 void intel_pmu_lbr_reset(void);
 
-void intel_pmu_lbr_enable(struct perf_event *event);
+void intel_pmu_lbr_add(struct perf_event *event);
 
-void intel_pmu_lbr_disable(struct perf_event *event);
+void intel_pmu_lbr_del(struct perf_event *event);
 
 void intel_pmu_lbr_enable_all(bool pmi);
 
-- 
cgit v0.10.2


From c3a61a2c5c6b6b4dd878812507c681c9aec3ed17 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jul 2016 19:37:52 +0200
Subject: perf/x86/intel: Eliminate dead code in intel_pmu_lbr_del()

Since pmu::del() is always called under perf_pmu_disable(), the block
conditional on cpuc->enabled is dead.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index e7b58c2..1bd72e9 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -467,12 +467,6 @@ void intel_pmu_lbr_del(struct perf_event *event)
 	cpuc->lbr_users--;
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
 	perf_sched_cb_dec(event->ctx->pmu);
-
-	if (cpuc->enabled && !cpuc->lbr_users) {
-		__intel_pmu_lbr_disable();
-		/* avoid stale pointer */
-		cpuc->lbr_context = NULL;
-	}
 }
 
 void intel_pmu_lbr_enable_all(bool pmi)
-- 
cgit v0.10.2


From a5dcff628a678b9f4535155662f81c5cda066bc7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jul 2016 19:37:52 +0200
Subject: perf/x86/intel: Remove redundant test from intel_pmu_lbr_add()

By the time we call pmu::add(), event->ctx must be set, and we
even already rely on this, so remove that test from
intel_pmu_lbr_add().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 1bd72e9..439b09d 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -440,8 +440,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	}
 	cpuc->br_sel = event->hw.branch_reg.reg;
 
-	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-					event->ctx->task_ctx_data) {
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
 		task_ctx = event->ctx->task_ctx_data;
 		task_ctx->lbr_callstack_users++;
 	}
-- 
cgit v0.10.2


From 3e2c1a67d616dbc1034bc39448cd5f4aa3bd3cca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jul 2016 19:37:52 +0200
Subject: perf/x86/intel: Clean up LBR state tracking

The lbr_context logic confused me; it appears to me to try and do the
same thing the pmu::sched_task() callback does now, but limited to
per-task events.

So rip it out. Afaict this should also improve performance, because I
think the current code can end up doing lbr_reset() twice, once from
the pmu::add() and then again from pmu::sched_task(), and MSR writes
(all 3*16 of them) are expensive!!

While thinking through the cases that need the reset it occured to me
the first install of an event in an active context needs to reset the
LBR (who knows what crap is in there), but detecting this case is
somewhat hard.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 439b09d..fc6cf21 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
 	/*
@@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 	 */
 	task_ctx = ctx ? ctx->task_ctx_data : NULL;
 	if (task_ctx) {
-		if (sched_in) {
+		if (sched_in)
 			__intel_pmu_lbr_restore(task_ctx);
-			cpuc->lbr_context = ctx;
-		} else {
+		else
 			__intel_pmu_lbr_save(task_ctx);
-		}
 		return;
 	}
 
 	/*
-	 * When sampling the branck stack in system-wide, it may be
-	 * necessary to flush the stack on context switch. This happens
-	 * when the branch stack does not tag its entries with the pid
-	 * of the current task. Otherwise it becomes impossible to
-	 * associate a branch entry with a task. This ambiguity is more
-	 * likely to appear when the branch stack supports priv level
-	 * filtering and the user sets it to monitor only at the user
-	 * level (which could be a useful measurement in system-wide
-	 * mode). In that case, the risk is high of having a branch
-	 * stack with branch from multiple tasks.
- 	 */
-	if (sched_in) {
+	 * Since a context switch can flip the address space and LBR entries
+	 * are not tagged with an identifier, we need to wipe the LBR, even for
+	 * per-cpu events. You simply cannot resolve the branches from the old
+	 * address space.
+	 */
+	if (sched_in)
 		intel_pmu_lbr_reset();
-		cpuc->lbr_context = ctx;
-	}
 }
 
 static inline bool branch_user_callstack(unsigned br_sel)
@@ -430,14 +419,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	if (!x86_pmu.lbr_nr)
 		return;
 
-	/*
-	 * Reset the LBR stack if we changed task context to
-	 * avoid data leaks.
-	 */
-	if (event->ctx->task && cpuc->lbr_context != event->ctx) {
-		intel_pmu_lbr_reset();
-		cpuc->lbr_context = event->ctx;
-	}
 	cpuc->br_sel = event->hw.branch_reg.reg;
 
 	if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
@@ -445,8 +426,28 @@ void intel_pmu_lbr_add(struct perf_event *event)
 		task_ctx->lbr_callstack_users++;
 	}
 
-	cpuc->lbr_users++;
+	/*
+	 * Request pmu::sched_task() callback, which will fire inside the
+	 * regular perf event scheduling, so that call will:
+	 *
+	 *  - restore or wipe; when LBR-callstack,
+	 *  - wipe; otherwise,
+	 *
+	 * when this is from __perf_event_task_sched_in().
+	 *
+	 * However, if this is from perf_install_in_context(), no such callback
+	 * will follow and we'll need to reset the LBR here if this is the
+	 * first LBR event.
+	 *
+	 * The problem is, we cannot tell these cases apart... but we can
+	 * exclude the biggest chunk of cases by looking at
+	 * event->total_time_running. An event that has accrued runtime cannot
+	 * be 'new'. Conversely, a new event can get installed through the
+	 * context switch path for the first time.
+	 */
 	perf_sched_cb_inc(event->ctx->pmu);
+	if (!cpuc->lbr_users++ && !event->total_time_running)
+		intel_pmu_lbr_reset();
 }
 
 void intel_pmu_lbr_del(struct perf_event *event)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index aa6ea5a..5874d8d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -201,7 +201,6 @@ struct cpu_hw_events {
 	 * Intel LBR bits
 	 */
 	int				lbr_users;
-	void				*lbr_context;
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 	struct er_account		*lbr_sel;
-- 
cgit v0.10.2


From e48c178814b4a33f84f62d01f5a601ebd57fbba8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Jul 2016 09:18:30 +0200
Subject: perf/core: Optimize perf_pmu_sched_task()

For perf record -b, which requires the pmu::sched_task callback the
current code is rather expensive:

     7.68%  sched-pipe  [kernel.vmlinux]    [k] perf_pmu_sched_task
     5.95%  sched-pipe  [kernel.vmlinux]    [k] __switch_to
     5.20%  sched-pipe  [kernel.vmlinux]    [k] __intel_pmu_disable_all
     3.95%  sched-pipe  perf                [.] worker_thread

The problem is that it will iterate all registered PMUs, most of which
will not have anything to do. Avoid this by keeping an explicit list
of PMUs that have requested the callback.

The perf_sched_cb_{inc,dec}() functions already takes the required pmu
argument, and now that these functions are no longer called from NMI
context we can use them to manage a list.

With this patch applied the function doesn't show up in the top 4
anymore (it dropped to 18th place).

     6.67%  sched-pipe  [kernel.vmlinux]    [k] __switch_to
     6.18%  sched-pipe  [kernel.vmlinux]    [k] __intel_pmu_disable_all
     3.92%  sched-pipe  [kernel.vmlinux]    [k] switch_mm_irqs_off
     3.71%  sched-pipe  perf                [.] worker_thread

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43c..529c41f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -774,6 +774,9 @@ struct perf_cpu_context {
 #ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
 #endif
+
+	struct list_head		sched_cb_entry;
+	int				sched_cb_usage;
 };
 
 struct perf_output_handle {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 57aff71..803481c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2805,13 +2805,26 @@ unlock:
 	}
 }
 
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
 void perf_sched_cb_dec(struct pmu *pmu)
 {
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
 	this_cpu_dec(perf_sched_cb_usages);
+
+	if (!--cpuctx->sched_cb_usage)
+		list_del(&cpuctx->sched_cb_entry);
 }
 
+
 void perf_sched_cb_inc(struct pmu *pmu)
 {
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+	if (!cpuctx->sched_cb_usage++)
+		list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
 	this_cpu_inc(perf_sched_cb_usages);
 }
 
@@ -2829,34 +2842,24 @@ static void perf_pmu_sched_task(struct task_struct *prev,
 {
 	struct perf_cpu_context *cpuctx;
 	struct pmu *pmu;
-	unsigned long flags;
 
 	if (prev == next)
 		return;
 
-	local_irq_save(flags);
-
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		if (pmu->sched_task) {
-			cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+	list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+		pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
 
-			perf_pmu_disable(pmu);
+		if (WARN_ON_ONCE(!pmu->sched_task))
+			continue;
 
-			pmu->sched_task(cpuctx->task_ctx, sched_in);
+		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+		perf_pmu_disable(pmu);
 
-			perf_pmu_enable(pmu);
+		pmu->sched_task(cpuctx->task_ctx, sched_in);
 
-			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-		}
+		perf_pmu_enable(pmu);
+		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 	}
-
-	rcu_read_unlock();
-
-	local_irq_restore(flags);
 }
 
 static void perf_event_switch(struct task_struct *task,
@@ -10393,6 +10396,8 @@ static void __init perf_event_init_all_cpus(void)
 
 		INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
 		raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+
+		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
 	}
 }
 
-- 
cgit v0.10.2


From bdfaa2eecd5f6ca0cb5cff2bc7a974a15a2fd21b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 17 Aug 2016 17:37:04 +0200
Subject: uprobes: Rename the "struct page *" args of __replace_page()

Purely cosmetic, no changes in the compiled code.

Perhaps it is just me but I can hardly read __replace_page() because I can't
distinguish "page" from "kpage" and because I need to look at the caller to
to ensure that, say, kpage is really the new page and the code is correct.
Rename them to old_page and new_page, this matches the caller.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brenden Blanco <bblanco@plumgrid.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Link: http://lkml.kernel.org/r/20160817153704.GC29724@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c50276..d4129bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  * Returns 0 on success, -EFAULT on failure.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
-				struct page *page, struct page *kpage)
+				struct page *old_page, struct page *new_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
@@ -161,49 +161,49 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
-	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
 			false);
 	if (err)
 		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
-	lock_page(page);
+	lock_page(old_page);
 
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
-	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	ptep = page_check_address(old_page, mm, addr, &ptl, 0);
 	if (!ptep) {
-		mem_cgroup_cancel_charge(kpage, memcg, false);
+		mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
 	}
 
-	get_page(kpage);
-	page_add_new_anon_rmap(kpage, vma, addr, false);
-	mem_cgroup_commit_charge(kpage, memcg, false, false);
-	lru_cache_add_active_or_unevictable(kpage, vma);
+	get_page(new_page);
+	page_add_new_anon_rmap(new_page, vma, addr, false);
+	mem_cgroup_commit_charge(new_page, memcg, false, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 
-	if (!PageAnon(page)) {
-		dec_mm_counter(mm, mm_counter_file(page));
+	if (!PageAnon(old_page)) {
+		dec_mm_counter(mm, mm_counter_file(old_page));
 		inc_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
 	ptep_clear_flush_notify(vma, addr, ptep);
-	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+	set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(page, false);
-	if (!page_mapped(page))
-		try_to_free_swap(page);
+	page_remove_rmap(old_page, false);
+	if (!page_mapped(old_page))
+		try_to_free_swap(old_page);
 	pte_unmap_unlock(ptep, ptl);
 
 	if (vma->vm_flags & VM_LOCKED)
-		munlock_vma_page(page);
-	put_page(page);
+		munlock_vma_page(old_page);
+	put_page(old_page);
 
 	err = 0;
  unlock:
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	unlock_page(page);
+	unlock_page(old_page);
 	return err;
 }
 
-- 
cgit v0.10.2


From 29dd3288705f26cc27663e79061209dabce2d5b9 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Wed, 17 Aug 2016 15:06:08 +0530
Subject: bitmap.h, perf/core: Fix the mask in perf_output_sample_regs()

When decoding the perf_regs mask in perf_output_sample_regs(),
we loop through the mask using find_first_bit and find_next_bit functions.

While the exisiting code works fine in most of the case, the logic
is broken for big-endian 32-bit kernels.

When reading a u64 mask using (u32 *)(&val)[0], find_*_bit() assumes
that it gets the lower 32 bits of u64, but instead it gets the upper
32 bits - which is wrong.

The fix is to swap the words of the u64 to handle this case.
This is _not_ a regular endianness swap.

Suggested-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Yury Norov <ynorov@caviumnetworks.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1471426568-31051-2-git-send-email-maddy@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 598bc99..3b77588 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -339,6 +339,24 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen,
 	return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
 }
 
+/*
+ * bitmap_from_u64 - Check and swap words within u64.
+ *  @mask: source bitmap
+ *  @dst:  destination bitmap
+ *
+ * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*]
+ * to read u64 mask, we will get the wrong word.
+ * That is "(u32 *)(&val)[0]" gets the upper 32 bits,
+ * but we expect the lower 32-bits of u64.
+ */
+static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+{
+	dst[0] = mask & ULONG_MAX;
+
+	if (sizeof(mask) > sizeof(unsigned long))
+		dst[1] = mask >> 32;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __LINUX_BITMAP_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ca4fde5..849919c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5340,9 +5340,10 @@ perf_output_sample_regs(struct perf_output_handle *handle,
 			struct pt_regs *regs, u64 mask)
 {
 	int bit;
+	DECLARE_BITMAP(_mask, 64);
 
-	for_each_set_bit(bit, (const unsigned long *) &mask,
-			 sizeof(mask) * BITS_PER_BYTE) {
+	bitmap_from_u64(_mask, mask);
+	for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
 		u64 val;
 
 		val = perf_reg_value(regs, bit);
-- 
cgit v0.10.2


From 4ff6a8debf48a7bf48e93c01da720785070d3a25 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Wed, 17 Aug 2016 13:55:05 -0700
Subject: perf/core: Generalize event->group_flags

Currently, PERF_GROUP_SOFTWARE is used in the group_flags field of a
group's leader to indicate that is_software_event(event) is true for all
events in a group. This is the only usage of event->group_flags.

This pattern of setting a group level flags when all events in the group
share a property is useful for the flag introduced in the next patch and
for future CQM/CMT flags. So this patches generalizes group_flags to work
as an aggregate of event level flags.

PERF_GROUP_SOFTWARE denotes an inmutable event's property. All other flags
that I intend to add are also determinable at event initialization.
To better convey the above, this patch renames event's group_flags to
group_caps and PERF_GROUP_SOFTWARE to PERF_EV_CAP_SOFTWARE.

Individual event flags are stored in the new event->event_caps. Since the
cap flags do not change after event initialization, there is no need to
serialize event_caps. This new field is used when events are added to a
context, similarly to how PERF_GROUP_SOFTWARE and is_software_event()
worked.

Lastly, for consistency, updates is_software_event() to rely in event_cap
instead of the context index.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471467307-61171-3-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 529c41f..6f7459f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -510,9 +510,12 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
 					struct perf_sample_data *,
 					struct pt_regs *regs);
 
-enum perf_group_flag {
-	PERF_GROUP_SOFTWARE		= 0x1,
-};
+/*
+ * Event capabilities. For event_caps and groups caps.
+ *
+ * PERF_EV_CAP_SOFTWARE: Is a software event.
+ */
+#define PERF_EV_CAP_SOFTWARE		BIT(0)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
@@ -568,7 +571,12 @@ struct perf_event {
 	struct hlist_node		hlist_entry;
 	struct list_head		active_entry;
 	int				nr_siblings;
-	int				group_flags;
+
+	/* Not serialized. Only written during event initialization. */
+	int				event_caps;
+	/* The cumulative AND of all event_caps for events in this group. */
+	int				group_caps;
+
 	struct perf_event		*group_leader;
 	struct pmu			*pmu;
 	void				*pmu_private;
@@ -988,7 +996,7 @@ static inline bool is_sampling_event(struct perf_event *event)
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return event->pmu->task_ctx_nr == perf_sw_context;
+	return event->event_caps & PERF_EV_CAP_SOFTWARE;
 }
 
 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 849919c..8c42a5a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1475,8 +1475,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (event->group_leader == event) {
 		struct list_head *list;
 
-		if (is_software_event(event))
-			event->group_flags |= PERF_GROUP_SOFTWARE;
+		event->group_caps = event->event_caps;
 
 		list = ctx_group_list(event, ctx);
 		list_add_tail(&event->group_entry, list);
@@ -1630,9 +1629,7 @@ static void perf_group_attach(struct perf_event *event)
 
 	WARN_ON_ONCE(group_leader->ctx != event->ctx);
 
-	if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
-			!is_software_event(event))
-		group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+	group_leader->group_caps &= event->event_caps;
 
 	list_add_tail(&event->group_entry, &group_leader->sibling_list);
 	group_leader->nr_siblings++;
@@ -1723,7 +1720,7 @@ static void perf_group_detach(struct perf_event *event)
 		sibling->group_leader = sibling;
 
 		/* Inherit group flags from the previous leader */
-		sibling->group_flags = event->group_flags;
+		sibling->group_caps = event->group_caps;
 
 		WARN_ON_ONCE(sibling->ctx != event->ctx);
 	}
@@ -2149,7 +2146,7 @@ static int group_can_go_on(struct perf_event *event,
 	/*
 	 * Groups consisting entirely of software events can always go on.
 	 */
-	if (event->group_flags & PERF_GROUP_SOFTWARE)
+	if (event->group_caps & PERF_EV_CAP_SOFTWARE)
 		return 1;
 	/*
 	 * If an exclusive group is already on, no other hardware
@@ -9490,6 +9487,9 @@ SYSCALL_DEFINE5(perf_event_open,
 			goto err_alloc;
 	}
 
+	if (pmu->task_ctx_nr == perf_sw_context)
+		event->event_caps |= PERF_EV_CAP_SOFTWARE;
+
 	if (group_leader &&
 	    (is_software_event(event) != is_software_event(group_leader))) {
 		if (is_software_event(event)) {
@@ -9503,7 +9503,7 @@ SYSCALL_DEFINE5(perf_event_open,
 			 */
 			pmu = group_leader->pmu;
 		} else if (is_software_event(group_leader) &&
-			   (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+			   (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
 			/*
 			 * In case the group is a pure software group, and we
 			 * try to add a hardware event, move the whole group to
-- 
cgit v0.10.2


From d6a2f9035bfc27d0e9d78b13635dda9fb017ac01 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Wed, 17 Aug 2016 13:55:06 -0700
Subject: perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG

Introduce the flag PMU_EV_CAP_READ_ACTIVE_PKG, useful for uncore events,
that allows a PMU to signal the generic perf code that an event is readable
in the current CPU if the event is active in a CPU in the same package as
the current CPU.

This is an optimization that avoids a unnecessary IPI for the common case
where uncore events are run and read in the same package but in
different CPUs.

As an example, the IPI removal speeds up perf_read() in my Haswell system
as follows:

  - For event UNC_C_LLC_LOOKUP: From 260 us to 31 us.
  - For event RAPL's power/energy-cores/: From to 255 us to 27 us.

For the optimization to work, all events in the group must have it
(similarly to PERF_EV_CAP_SOFTWARE).

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471467307-61171-4-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6f7459f..5c53625 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -514,8 +514,11 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
  * Event capabilities. For event_caps and groups caps.
  *
  * PERF_EV_CAP_SOFTWARE: Is a software event.
+ * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
+ * from any CPU in the package where it is active.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
+#define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c42a5a..3f07e6c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3424,6 +3424,22 @@ struct perf_read_data {
 	int ret;
 };
 
+static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+{
+	int event_cpu = event->oncpu;
+	u16 local_pkg, event_pkg;
+
+	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+		event_pkg =  topology_physical_package_id(event_cpu);
+		local_pkg =  topology_physical_package_id(local_cpu);
+
+		if (event_pkg == local_pkg)
+			return local_cpu;
+	}
+
+	return event_cpu;
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -3545,7 +3561,7 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-	int ret = 0;
+	int ret = 0, cpu_to_read, local_cpu;
 
 	/*
 	 * If event is enabled and currently active on a CPU, update the
@@ -3557,7 +3573,12 @@ static int perf_event_read(struct perf_event *event, bool group)
 			.group = group,
 			.ret = 0,
 		};
-		ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+
+		local_cpu = get_cpu();
+		cpu_to_read = find_cpu_to_read(event, local_cpu);
+		put_cpu();
+
+		ret = smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
 		/* The event must have been read from an online CPU: */
 		WARN_ON_ONCE(ret);
 		ret = ret ? : data.ret;
-- 
cgit v0.10.2


From e64cd6f73ff5a7eb4f8f759049ee24a3fe55e731 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Wed, 17 Aug 2016 13:55:07 -0700
Subject: perf/x86: Use PMUEF_READ_CPU_PKG in uncore events

Add flag to Intel's uncore and RAPL.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471467307-61171-5-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 2886593..62bebcc 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -357,6 +357,8 @@ static int rapl_pmu_event_init(struct perf_event *event)
 	if (event->cpu < 0)
 		return -EINVAL;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	/*
 	 * check event is known (determines counter)
 	 */
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 463dc7a..7b3cc8b 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -664,6 +664,8 @@ static int uncore_pmu_event_init(struct perf_event *event)
 	event->cpu = box->cpu;
 	event->pmu_private = box;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 9d35ec0..5f845ee 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -388,6 +388,8 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
 	event->cpu = box->cpu;
 	event->pmu_private = box;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
-- 
cgit v0.10.2


From b6a32f023fcc9cbd1602f78a467fd8d41bbc9457 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 18 Aug 2016 11:09:52 +0200
Subject: perf/x86: Fix PEBS threshold initialization

Latest PEBS rework change could skip initialization
of the ds->pebs_interrupt_threshold for single event
PEBS threshold events.

Make sure the PEBS threshold gets always initialized.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 09e61b4f7849 ("perf/x86/intel: Rework the large PEBS setup code")
Link: http://lkml.kernel.org/r/1471511392-29875-1-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 248023f..e0288d5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -834,14 +834,24 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 static void
 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
 {
+	/*
+	 * Make sure we get updated with the first PEBS
+	 * event. It will trigger also during removal, but
+	 * that does not hurt:
+	 */
+	bool update = cpuc->n_pebs == 1;
+
 	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
 		if (!needed_cb)
 			perf_sched_cb_inc(pmu);
 		else
 			perf_sched_cb_dec(pmu);
 
-		pebs_update_threshold(cpuc);
+		update = true;
 	}
+
+	if (update)
+		pebs_update_threshold(cpuc);
 }
 
 void intel_pmu_pebs_add(struct perf_event *event)
-- 
cgit v0.10.2


From bd48c63eb0afc28b29fb342f215cdd77b995c02e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 5 Aug 2016 15:40:30 -0300
Subject: tools: Introduce tools/include/linux/time64.h for *SEC_PER_*SEC
 macros

And remove it from tools/perf/{perf,util}.h, making code that needs
these macros to include linux/time64.h instead, to match how this is
used in the kernel sources.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-e69fc1pvkgt57yvxqt6eunyg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/include/linux/time64.h b/tools/include/linux/time64.h
new file mode 100644
index 0000000..df92654
--- /dev/null
+++ b/tools/include/linux/time64.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_TIME64_H
+#define _TOOLS_LINUX_TIME64_H
+
+#define MSEC_PER_SEC	1000L
+#define USEC_PER_MSEC	1000L
+#define NSEC_PER_USEC	1000L
+#define NSEC_PER_MSEC	1000000L
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000L
+#define FSEC_PER_SEC	1000000000000000LL
+
+#endif /* _LINUX_TIME64_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ad2534d..f23a5e7 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -77,4 +77,5 @@ tools/include/linux/stringify.h
 tools/include/linux/types.h
 tools/include/linux/err.h
 tools/include/linux/bitmap.h
+tools/include/linux/time64.h
 tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 5e2127e..a0040f7 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -24,6 +24,7 @@
 #include <sys/timerfd.h>
 #endif
 
+#include <linux/time64.h>
 #include <termios.h>
 #include <semaphore.h>
 #include <pthread.h>
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c859e59..6b3c8b0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -24,6 +24,7 @@
 #include "util/thread-stack.h"
 #include <linux/bitmap.h>
 #include <linux/stringify.h>
+#include <linux/time64.h>
 #include "asm/bug.h"
 #include "util/mem-events.h"
 
@@ -464,9 +465,9 @@ static void print_sample_start(struct perf_sample *sample,
 
 	if (PRINT_FIELD(TIME)) {
 		nsecs = sample->time;
-		secs = nsecs / NSECS_PER_SEC;
-		nsecs -= secs * NSECS_PER_SEC;
-		usecs = nsecs / NSECS_PER_USEC;
+		secs = nsecs / NSEC_PER_SEC;
+		nsecs -= secs * NSEC_PER_SEC;
+		usecs = nsecs / NSEC_PER_USEC;
 		if (nanosecs)
 			printf("%5lu.%09llu: ", secs, nsecs);
 		else
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3c7452b..e33a66b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
 #include "util/group.h"
 #include "asm/bug.h"
 
+#include <linux/time64.h>
 #include <api/fs/fs.h>
 #include <stdlib.h>
 #include <sys/prctl.h>
@@ -354,7 +355,7 @@ static void process_interval(void)
 	diff_timespec(&rs, &ts, &ref_time);
 
 	if (STAT_RECORD) {
-		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
 			pr_err("failed to write stat round event\n");
 	}
 
@@ -2175,8 +2176,8 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 		update_stats(&walltime_nsecs_stats, stat_round->time);
 
 	if (stat_config.interval && stat_round->time) {
-		tsh.tv_sec  = stat_round->time / NSECS_PER_SEC;
-		tsh.tv_nsec = stat_round->time % NSECS_PER_SEC;
+		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
+		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
 		ts = &tsh;
 	}
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b8c6766..b4fc1ab 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -45,6 +45,7 @@
 #include <linux/audit.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
+#include <linux/time64.h>
 
 #ifndef O_CLOEXEC
 # define O_CLOEXEC		02000000
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cb0f135..9a0236a 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -14,13 +14,6 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
 #define HAVE_ATTR_TEST
 #include "perf-sys.h"
 
-#ifndef NSEC_PER_SEC
-# define NSEC_PER_SEC			1000000000ULL
-#endif
-#ifndef NSEC_PER_USEC
-# define NSEC_PER_USEC			1000ULL
-#endif
-
 static inline unsigned long long rdclock(void)
 {
 	struct timespec ts;
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 8c4212a..c1838b6 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -6,6 +6,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <api/debug.h>
+#include <linux/time64.h>
 
 #include "cache.h"
 #include "color.h"
@@ -14,9 +15,6 @@
 #include "util.h"
 #include "target.h"
 
-#define NSECS_PER_SEC  1000000000ULL
-#define NSECS_PER_USEC 1000ULL
-
 int verbose;
 bool dump_trace = false, quiet = false;
 int debug_ordered_events;
@@ -54,9 +52,9 @@ static int veprintf_time(u64 t, const char *fmt, va_list args)
 	int ret = 0;
 	u64 secs, usecs, nsecs = t;
 
-	secs   = nsecs / NSECS_PER_SEC;
-	nsecs -= secs  * NSECS_PER_SEC;
-	usecs  = nsecs / NSECS_PER_USEC;
+	secs   = nsecs / NSEC_PER_SEC;
+	nsecs -= secs  * NSEC_PER_SEC;
+	usecs  = nsecs / NSEC_PER_USEC;
 
 	ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
 		      secs, usecs);
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 5d1eb1c..e55a132 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,6 +25,7 @@
 #include <ctype.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "../util.h"
 #include <EXTERN.h>
@@ -359,8 +360,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 	if (!test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler, event->print_fmt.args);
 
-	s = nsecs / NSECS_PER_SEC;
-	ns = nsecs - s * NSECS_PER_SEC;
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
 	scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e0203b9..089438d 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -27,6 +27,7 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "../../perf.h"
 #include "../debug.h"
@@ -426,8 +427,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
 		if (!dict)
 			Py_FatalError("couldn't create Python dict");
 	}
-	s = nsecs / NSECS_PER_SEC;
-	ns = nsecs - s * NSECS_PER_SEC;
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
 	scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index cee559d..85c5680 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -15,6 +15,7 @@
 #include <byteswap.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
+#include <linux/time64.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e5f5547..43899e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -179,10 +179,6 @@ static inline void *zalloc(size_t size)
 #undef tolower
 #undef toupper
 
-#ifndef NSEC_PER_MSEC
-#define NSEC_PER_MSEC	1000000L
-#endif
-
 int parse_nsec_time(const char *str, u64 *ptime);
 
 extern unsigned char sane_ctype[256];
-- 
cgit v0.10.2


From a8ad8329b91551bbfc3a317f3655dffcd8c18591 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 11:55:38 -0300
Subject: perf bench numa: Use NSEC_PER_U?SEC

Following kernel practices, using linux/time64.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-7vnv15263y50qku76p4w5xk6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f7f5300..8efe904 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -30,6 +30,7 @@
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
+#include <linux/time64.h>
 
 #include <numa.h>
 #include <numaif.h>
@@ -1004,7 +1005,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	if (strong && process_groups == g->p.nr_proc) {
 		if (!*convergence) {
 			*convergence = runtime_ns_max;
-			tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
 			if (g->p.measure_convergence) {
 				g->all_converged = true;
 				g->stop_work = true;
@@ -1012,7 +1013,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 		}
 	} else {
 		if (*convergence) {
-			tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
 			*convergence = 0;
 		}
 		tprintf("\n");
@@ -1022,7 +1023,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 static void show_summary(double runtime_ns_max, int l, double *convergence)
 {
 	tprintf("\r #  %5.1f%%  [%.1f mins]",
-		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
 
 	calc_convergence(runtime_ns_max, convergence);
 
@@ -1179,8 +1180,8 @@ static void *worker_thread(void *__tdata)
 
 		if (details >= 3) {
 			timersub(&stop, &start, &diff);
-			runtime_ns_max = diff.tv_sec * 1000000000;
-			runtime_ns_max += diff.tv_usec * 1000;
+			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 			if (details >= 0) {
 				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
@@ -1192,23 +1193,23 @@ static void *worker_thread(void *__tdata)
 			continue;
 
 		timersub(&stop, &start0, &diff);
-		runtime_ns_max = diff.tv_sec * 1000000000ULL;
-		runtime_ns_max += diff.tv_usec * 1000ULL;
+		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 		show_summary(runtime_ns_max, l, &convergence);
 	}
 
 	gettimeofday(&stop, NULL);
 	timersub(&stop, &start0, &diff);
-	td->runtime_ns = diff.tv_sec * 1000000000ULL;
-	td->runtime_ns += diff.tv_usec * 1000ULL;
-	td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
 
 	getrusage(RUSAGE_THREAD, &rusage);
-	td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
-	td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
-	td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
-	td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
+	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
 
 	free_data(thread_data, g->p.bytes_thread);
 
@@ -1469,7 +1470,7 @@ static int __bench_numa(const char *name)
 	}
 	/* Wait for all the threads to start up: */
 	while (g->nr_tasks_started != g->p.nr_tasks)
-		usleep(1000);
+		usleep(USEC_PER_MSEC);
 
 	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
 
@@ -1488,9 +1489,9 @@ static int __bench_numa(const char *name)
 
 		timersub(&stop, &start, &diff);
 
-		startup_sec = diff.tv_sec * 1000000000.0;
-		startup_sec += diff.tv_usec * 1000.0;
-		startup_sec /= 1e9;
+		startup_sec = diff.tv_sec * NSEC_PER_SEC;
+		startup_sec += diff.tv_usec * NSEC_PER_USEC;
+		startup_sec /= NSEC_PER_SEC;
 
 		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
 		tprintf(" #\n");
@@ -1529,14 +1530,14 @@ static int __bench_numa(const char *name)
 	tprintf("\n ###\n");
 	tprintf("\n");
 
-	runtime_sec_max = diff.tv_sec * 1000000000.0;
-	runtime_sec_max += diff.tv_usec * 1000.0;
-	runtime_sec_max /= 1e9;
+	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+	runtime_sec_max /= NSEC_PER_SEC;
 
-	runtime_sec_min = runtime_ns_min/1e9;
+	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
 
 	bytes = g->bytes_done;
-	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
 
 	if (g->p.measure_convergence) {
 		print_res(name, runtime_sec_max,
@@ -1562,7 +1563,7 @@ static int __bench_numa(const char *name)
 	print_res(name, bytes / 1e9,
 		"GB,", "data-total",		"GB data processed, total");
 
-	print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
 		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
 
 	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
@@ -1581,9 +1582,9 @@ static int __bench_numa(const char *name)
 				snprintf(tname, 32, "process%d:thread%d", p, t);
 				print_res(tname, td->speed_gbs,
 					"GB/sec",	"thread-speed", "GB/sec/thread speed");
-				print_res(tname, td->system_time_ns / 1e9,
+				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-system-time", "system CPU time/thread");
-				print_res(tname, td->user_time_ns / 1e9,
+				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-user-time", "user CPU time/thread");
 			}
 		}
-- 
cgit v0.10.2


From 4fc76e495b60da343b94dadeea1001f878ceb955 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 12:23:49 -0300
Subject: perf sched: Use linux/time64.h

Probably the next step is to introduce linux/time.h and use
timespec_to_ns(), etc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-4nqhskn27fn93cz3ukbc8drf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0dfe8df..f5503ca 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -26,6 +26,7 @@
 #include <pthread.h>
 #include <math.h>
 #include <api/fs/fs.h>
+#include <linux/time64.h>
 
 #define PR_SET_NAME		15               /* Set process name */
 #define MAX_CPUS		4096
@@ -199,7 +200,7 @@ static u64 get_nsecs(void)
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 
-	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
 }
 
 static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
@@ -223,7 +224,7 @@ static void sleep_nsecs(u64 nsecs)
 
 static void calibrate_run_measurement_overhead(struct perf_sched *sched)
 {
-	u64 T0, T1, delta, min_delta = 1000000000ULL;
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
 	int i;
 
 	for (i = 0; i < 10; i++) {
@@ -240,7 +241,7 @@ static void calibrate_run_measurement_overhead(struct perf_sched *sched)
 
 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
 {
-	u64 T0, T1, delta, min_delta = 1000000000ULL;
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
 	int i;
 
 	for (i = 0; i < 10; i++) {
@@ -452,8 +453,8 @@ static u64 get_cpu_usage_nsec_parent(void)
 	err = getrusage(RUSAGE_SELF, &ru);
 	BUG_ON(err);
 
-	sum =  ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
-	sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
+	sum =  ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
+	sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
 
 	return sum;
 }
@@ -667,12 +668,12 @@ static void run_one_test(struct perf_sched *sched)
 		sched->run_avg = delta;
 	sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
 
-	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
+	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
 
-	printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6);
+	printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
 
 	printf("cpu: %0.2f / %0.2f",
-		(double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6);
+		(double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
 
 #if 0
 	/*
@@ -680,8 +681,8 @@ static void run_one_test(struct perf_sched *sched)
 	 * accurate than the sched->sum_exec_runtime based statistics:
 	 */
 	printf(" [%0.2f / %0.2f]",
-		(double)sched->parent_cpu_usage/1e6,
-		(double)sched->runavg_parent_cpu_usage/1e6);
+		(double)sched->parent_cpu_usage / NSEC_PER_MSEC,
+		(double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
 #endif
 
 	printf("\n");
@@ -696,13 +697,13 @@ static void test_calibrations(struct perf_sched *sched)
 	u64 T0, T1;
 
 	T0 = get_nsecs();
-	burn_nsecs(sched, 1e6);
+	burn_nsecs(sched, NSEC_PER_MSEC);
 	T1 = get_nsecs();
 
 	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
 
 	T0 = get_nsecs();
-	sleep_nsecs(1e6);
+	sleep_nsecs(NSEC_PER_MSEC);
 	T1 = get_nsecs();
 
 	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
@@ -1213,10 +1214,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	avg = work_list->total_lat / work_list->nb_atoms;
 
 	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
-	      (double)work_list->total_runtime / 1e6,
-		 work_list->nb_atoms, (double)avg / 1e6,
-		 (double)work_list->max_lat / 1e6,
-		 (double)work_list->max_lat_at / 1e9);
+	      (double)work_list->total_runtime / NSEC_PER_MSEC,
+		 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
+		 (double)work_list->max_lat / NSEC_PER_MSEC,
+		 (double)work_list->max_lat_at / NSEC_PER_SEC);
 }
 
 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1491,7 +1492,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
 		goto out;
 
-	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
+	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp / NSEC_PER_SEC);
 	if (new_shortname) {
 		const char *pid_color = color;
 
@@ -1753,7 +1754,7 @@ static int perf_sched__lat(struct perf_sched *sched)
 
 	printf(" -----------------------------------------------------------------------------------------------------------------\n");
 	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
-		(double)sched->all_runtime / 1e6, sched->all_count);
+		(double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
 
 	printf(" ---------------------------------------------------\n");
 
-- 
cgit v0.10.2


From af4b2c972a5fc9358486d946d15f32510534ccbf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 12:45:58 -0300
Subject: perf timechart: Use NSEC_PER_U?SEC

Following kernel practices, using linux/time64.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-5l1md8lsdhfnrlsqyejzo9w2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 733a554..e7eaa29 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -24,6 +24,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include <linux/rbtree.h>
+#include <linux/time64.h>
 #include "util/symbol.h"
 #include "util/callchain.h"
 #include "util/strlist.h"
@@ -1288,9 +1289,9 @@ static void draw_process_bars(struct timechart *tchart)
 			if (c->comm) {
 				char comm[256];
 				if (c->total_time > 5000000000) /* 5 seconds */
-					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
+					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
 				else
-					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
+					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);
 
 				svg_text(Y, c->start_time, comm);
 			}
@@ -1637,7 +1638,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
 	write_svg_file(tchart, output_name);
 
 	pr_info("Written %2.1f seconds of trace to %s.\n",
-		(tchart->last_time - tchart->first_time) / 1000000000.0, output_name);
+		(tchart->last_time - tchart->first_time) / (double)NSEC_PER_SEC, output_name);
 out_delete:
 	perf_session__delete(session);
 	return ret;
@@ -1901,10 +1902,10 @@ parse_time(const struct option *opt, const char *arg, int __maybe_unused unset)
 	if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
 		switch (unit) {
 		case 'm':
-			*value *= 1000000;
+			*value *= NSEC_PER_MSEC;
 			break;
 		case 'u':
-			*value *= 1000;
+			*value *= NSEC_PER_USEC;
 			break;
 		case 'n':
 			break;
@@ -1928,7 +1929,7 @@ int cmd_timechart(int argc, const char **argv,
 			.ordered_events	 = true,
 		},
 		.proc_num = 15,
-		.min_time = 1000000,
+		.min_time = NSEC_PER_MSEC,
 		.merge_dist = 1000,
 	};
 	const char *output_name = "output.svg";
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index eec6c11..1cbada2 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -18,6 +18,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "perf.h"
 #include "svghelper.h"
@@ -274,14 +275,14 @@ static char *time_to_string(u64 duration)
 
 	text[0] = 0;
 
-	if (duration < 1000) /* less than 1 usec */
+	if (duration < NSEC_PER_USEC) /* less than 1 usec */
 		return text;
 
-	if (duration < 1000 * 1000) { /* less than 1 msec */
-		sprintf(text, "%.1f us", duration / 1000.0);
+	if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+		sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
 		return text;
 	}
-	sprintf(text, "%.1f ms", duration / 1000.0 / 1000);
+	sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
 
 	return text;
 }
@@ -297,7 +298,7 @@ void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
 
 	style = "waiting";
 
-	if (end-start > 10 * 1000000) /* 10 msec */
+	if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
 		style = "WAITING";
 
 	text = time_to_string(end-start);
-- 
cgit v0.10.2


From 16633ccff091dacb2138b95e8911a1892983c6fd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 14:51:30 -0300
Subject: perf bench sched-pipe: Use linux/time64.h, USEC_PER_SEC

Following kernel practices.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-wgfu1h1pnw8lc919o2tan58y@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 1dc2d13..2243f01 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -25,6 +25,7 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <linux/time64.h>
 
 #include <pthread.h>
 
@@ -153,24 +154,24 @@ int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unu
 		printf("# Executed %d pipe operations between two %s\n\n",
 			loops, threaded ? "threads" : "processes");
 
-		result_usec = diff.tv_sec * 1000000;
+		result_usec = diff.tv_sec * USEC_PER_SEC;
 		result_usec += diff.tv_usec;
 
 		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 
 		printf(" %14lf usecs/op\n",
 		       (double)result_usec / (double)loops);
 		printf(" %14d ops/sec\n",
 		       (int)((double)loops /
-			     ((double)result_usec / (double)1000000)));
+			     ((double)result_usec / (double)USEC_PER_SEC)));
 		break;
 
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec / 1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 
 	default:
-- 
cgit v0.10.2


From 310ebb93676d2106b4c2e68bbf1b2811461bb9f3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 14:57:04 -0300
Subject: perf stat: Use *SEC_PER_*SEC macros

To match how this is done in the kernel.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-gym6yshewpdegt153u8v2q5r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e33a66b..90882b1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -173,7 +173,7 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
 {
 	r->tv_sec = a->tv_sec - b->tv_sec;
 	if (a->tv_nsec < b->tv_nsec) {
-		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
+		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
 		r->tv_sec--;
 	} else {
 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
@@ -365,7 +365,7 @@ static void process_interval(void)
 static void enable_counters(void)
 {
 	if (initial_delay)
-		usleep(initial_delay * 1000);
+		usleep(initial_delay * USEC_PER_MSEC);
 
 	/*
 	 * We need to enable counters only if:
@@ -542,8 +542,8 @@ static int __run_perf_stat(int argc, const char **argv)
 	bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
 
 	if (interval) {
-		ts.tv_sec  = interval / 1000;
-		ts.tv_nsec = (interval % 1000) * 1000000;
+		ts.tv_sec  = interval / USEC_PER_MSEC;
+		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
 	} else {
 		ts.tv_sec  = 1;
 		ts.tv_nsec = 0;
@@ -972,7 +972,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused,
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
-	double msecs = avg / 1e6;
+	double msecs = avg / NSEC_PER_MSEC;
 	const char *fmt_v, *fmt_n;
 	char name[25];
 
@@ -1461,7 +1461,7 @@ static void print_footer(void)
 	if (!null_run)
 		fprintf(output, "\n");
 	fprintf(output, " %17.9f seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats)/1e9);
+			avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
 	if (run_count > 1) {
 		fprintf(output, "                                        ");
 		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
-- 
cgit v0.10.2


From f2b91be731a6c1aff9333c1ce3fece7a311f1e0d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 14:59:21 -0300
Subject: perf bench mem: Use USEC_PER_SEC

Following kernel practices, using linux/time64.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-xdtmguafva17wp023sxojiib@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 2b54d0f..c684910 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -21,6 +21,7 @@
 #include <string.h>
 #include <sys/time.h>
 #include <errno.h>
+#include <linux/time64.h>
 
 #define K 1024
 
@@ -89,7 +90,7 @@ static u64 get_cycles(void)
 
 static double timeval2double(struct timeval *ts)
 {
-	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
+	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 }
 
 #define print_bps(x) do {						\
-- 
cgit v0.10.2


From af15e67e8f8572072167bfb193ceabff04f4b21e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 15:04:23 -0300
Subject: perf bench sched-messaging: Use USEC_PER_MSEC

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-xhyoyxejvorrgmwjx9k3j8k2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index bfaf950..6a111e7 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -29,6 +29,7 @@
 #include <poll.h>
 #include <limits.h>
 #include <err.h>
+#include <linux/time64.h>
 
 #define DATASIZE 100
 
@@ -312,11 +313,11 @@ int bench_sched_messaging(int argc, const char **argv,
 		       thread_mode ? "threads" : "processes");
 		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n", diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	default:
 		/* reaching here is something disaster */
-- 
cgit v0.10.2


From 0693e680d32e5c7415666ccfcff57ad4cd976294 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 15:05:46 -0300
Subject: perf record: Use USEC_PER_MSEC

Instead of a naked 1000.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-7v6be7jhvstbkvk3rsytjw0o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6355902..a3792e8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -42,7 +42,7 @@
 #include <sched.h>
 #include <sys/mman.h>
 #include <asm/bug.h>
-
+#include <linux/time64.h>
 
 struct record {
 	struct perf_tool	tool;
@@ -954,7 +954,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	}
 
 	if (opts->initial_delay) {
-		usleep(opts->initial_delay * 1000);
+		usleep(opts->initial_delay * USEC_PER_MSEC);
 		perf_evlist__enable(rec->evlist);
 	}
 
-- 
cgit v0.10.2


From c05a6e14150f81a8cb93d83af55582e05afc05ae Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 15:10:44 -0300
Subject: perf kvm: Use NSEC_PER_USEC

Following kernel practices and better documenting units of time.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Yarygin <yarygin@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-5x6p6fmzrogonpbnkkkw4usk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index a0040f7..08fa88f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -363,7 +363,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
 		if (!skip_event(decode)) {
 			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
 				 sample->time, sample->pid, vcpu_record->vcpu_id,
-				 decode, time_diff/1000);
+				 decode, time_diff / NSEC_PER_USEC);
 		}
 	}
 
@@ -609,15 +609,15 @@ static void print_result(struct perf_kvm_stat *kvm)
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
-		pr_info("%9.2fus ", (double)min / 1e3);
-		pr_info("%9.2fus ", (double)max / 1e3);
-		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
+		pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
+		pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
+		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
 			kvm_event_rel_stddev(vcpu, event));
 		pr_info("\n");
 	}
 
 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
-		kvm->total_count, kvm->total_time / 1e3);
+		kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
 
 	if (kvm->lost_events)
 		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
-- 
cgit v0.10.2


From 565e69114e4aa42664b0e7da135181651c3402f2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 15:35:21 -0300
Subject: perf bench futex: Use NSEC_PER_USEC

Following kernel practices and better documentin

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-xncwqxegjp13g2nxih3lp9mx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f96e22e..2b9705a 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -16,6 +16,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -62,7 +63,7 @@ static void print_summary(void)
 	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       requeued_avg,
 	       nthreads,
-	       requeuetime_avg/1e3,
+	       requeuetime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
 }
 
@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
 
 		if (!silent) {
 			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-			       j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
 		/* everybody should be blocked on futex2, wake'em up */
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4a2ecd7..2c8fa67 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -15,6 +15,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -156,7 +157,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 
 	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
 	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
-	       nblocked_threads, waketime_avg/1e3,
+	       nblocked_threads, waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -172,7 +173,7 @@ static void print_summary(void)
 	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
 	       nblocked_threads,
-	       waketime_avg/1e3,
+	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 87d8f4f..e246b1b 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -16,6 +16,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -81,7 +82,7 @@ static void print_summary(void)
 	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
 	       nthreads,
-	       waketime_avg/1e3,
+	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -182,7 +183,7 @@ int bench_futex_wake(int argc, const char **argv,
 
 		if (!silent) {
 			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
-			       j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
 		for (i = 0; i < nthreads; i++) {
-- 
cgit v0.10.2


From b9c4b0f40d22d4b1d29540f5faf6ca4269f25848 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 8 Aug 2016 15:37:58 -0300
Subject: perf top: Use MSEC_PER_SEC

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-iof4j6mutyogdeie1sj98dhv@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 418ed94..a3223aa 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -68,6 +68,7 @@
 #include <sys/mman.h>
 
 #include <linux/stringify.h>
+#include <linux/time64.h>
 #include <linux/types.h>
 
 static volatile int done;
@@ -624,7 +625,7 @@ static void *display_thread(void *arg)
 	display_setup_sig();
 	pthread__unblock_sigwinch();
 repeat:
-	delay_msecs = top->delay_secs * 1000;
+	delay_msecs = top->delay_secs * MSEC_PER_SEC;
 	set_term_quiet_input(&save);
 	/* trash return*/
 	getc(stdin);
-- 
cgit v0.10.2


From f8e6710de859e1ac3a5df294bddeca19f60cec9a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Aug 2016 17:28:26 +0200
Subject: perf hists: Introduce nr_header_lines into struct perf_hpp_list

Currently we support just single line headers, this is first step to
allow more.

Store the number of header lines in perf_hpp_list, which encompasses all
the display/sort entries and is thus suitable to hold this value.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1470583710-1649-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13d4143..95f7cf1 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -69,8 +69,11 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb)
 static void hist_browser__update_rows(struct hist_browser *hb)
 {
 	struct ui_browser *browser = &hb->b;
-	u16 header_offset = hb->show_headers ? 1 : 0, index_row;
+	struct hists *hists = hb->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	u16 header_offset, index_row;
 
+	header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0;
 	browser->rows = browser->height - header_offset;
 	/*
 	 * Verify if we were at the last line and that line isn't
@@ -99,8 +102,11 @@ static void hist_browser__refresh_dimensions(struct ui_browser *browser)
 
 static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
 {
-	u16 header_offset = browser->show_headers ? 1 : 0;
+	struct hists *hists = browser->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	u16 header_offset;
 
+	header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0;
 	ui_browser__gotorc(&browser->b, row + header_offset, column);
 }
 
@@ -1656,10 +1662,13 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 	u16 header_offset = 0;
 	struct rb_node *nd;
 	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+	struct hists *hists = hb->hists;
 
 	if (hb->show_headers) {
+		struct perf_hpp_list *hpp_list = hists->hpp_list;
+
 		hist_browser__show_headers(hb);
-		header_offset = 1;
+		header_offset = hpp_list->nr_header_lines;
 	}
 
 	ui_browser__hists_init_top(browser);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4274969..be7a17f 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -441,6 +441,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
 struct perf_hpp_list perf_hpp_list = {
 	.fields	= LIST_HEAD_INIT(perf_hpp_list.fields),
 	.sorts	= LIST_HEAD_INIT(perf_hpp_list.sorts),
+	.nr_header_lines = 1,
 };
 
 #undef HPP__COLOR_PRINT_FNS
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0a1edf1..8cc5d33 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -259,6 +259,7 @@ struct perf_hpp_list {
 	struct list_head fields;
 	struct list_head sorts;
 
+	int nr_header_lines;
 	int need_collapse;
 	int parent;
 	int sym;
-- 
cgit v0.10.2


From 74bb43f29ec80bc998804fa7399930d86c4bae67 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Aug 2016 17:28:27 +0200
Subject: perf hists: Add line argument into perf_hpp_fmt's header callback

Adding line argument into perf_hpp_fmt's header callback to be able to
request specific header line.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1470583710-1649-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 21ee753..75da965 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1033,7 +1033,8 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
 }
 
 static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		       struct hists *hists __maybe_unused)
+		       struct hists *hists __maybe_unused,
+		       int line __maybe_unused)
 {
 	struct diff_hpp_fmt *dfmt =
 		container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 95f7cf1..303ed62 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1523,7 +1523,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists);
+		ret = fmt->header(fmt, &dummy_hpp, hists, 0);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1560,7 +1560,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 		if (column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists);
+		ret = fmt->header(fmt, &dummy_hpp, hists, 0);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1597,7 +1597,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 			}
 			first_col = false;
 
-			ret = fmt->header(fmt, &dummy_hpp, hists);
+			ret = fmt->header(fmt, &dummy_hpp, hists, 0);
 			dummy_hpp.buf[ret] = '\0';
 
 			start = trim(dummy_hpp.buf);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index c5f3677..79cb5c4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
 				strcat(buf, "+");
 			first_col = false;
 
-			fmt->header(fmt, &hpp, hists);
+			fmt->header(fmt, &hpp, hists, 0);
 			strcat(buf, ltrim(rtrim(hpp.buf)));
 		}
 	}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index be7a17f..30457c6 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -230,7 +230,7 @@ static int hpp__width_fn(struct perf_hpp_fmt *fmt,
 }
 
 static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			  struct hists *hists)
+			  struct hists *hists, int line __maybe_unused)
 {
 	int len = hpp__width_fn(fmt, hpp, hists);
 	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f04a631..91b8e10 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -549,7 +549,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				    struct perf_hpp_list_node, list);
 
 	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
-		fmt->header(fmt, hpp, hists);
+		fmt->header(fmt, hpp, hists, 0);
 		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
 	}
 
@@ -569,7 +569,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				header_width += fprintf(fp, "+");
 			first_col = false;
 
-			fmt->header(fmt, hpp, hists);
+			fmt->header(fmt, hpp, hists, 0);
 
 			header_width += fprintf(fp, "%s", trim(hpp->buf));
 		}
@@ -658,7 +658,7 @@ hists__fprintf_standard_headers(struct hists *hists,
 		else
 			first = false;
 
-		fmt->header(fmt, hpp, hists);
+		fmt->header(fmt, hpp, hists, 0);
 		fprintf(fp, "%s", hpp->buf);
 	}
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 8cc5d33..71a44c1 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -230,7 +230,7 @@ struct perf_hpp {
 struct perf_hpp_fmt {
 	const char *name;
 	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		      struct hists *hists);
+		      struct hists *hists, int line);
 	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		     struct hists *hists);
 	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3d3cb83..a556de7 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1492,7 +1492,7 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 }
 
 static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct hists *hists)
+			      struct hists *hists, int line __maybe_unused)
 {
 	struct hpp_sort_entry *hse;
 	size_t len = fmt->user_len;
@@ -1797,7 +1797,8 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde,
 }
 
 static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct hists *hists __maybe_unused)
+			      struct hists *hists __maybe_unused,
+			      int line __maybe_unused)
 {
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
-- 
cgit v0.10.2


From 69705b35859195b5640861fff91cc936d1baf4eb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Aug 2016 17:28:28 +0200
Subject: perf tools tui: Display multiple header lines

Display multiple header lines in TUI browser, if it's configured within
struct perf_hpp_list::nr_header_lines.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1470583710-1649-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 303ed62..4a7aac8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1502,7 +1502,9 @@ static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 	return hpp->size <= 0;
 }
 
-static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size)
+static int
+hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
+				 size_t size, int line)
 {
 	struct hists *hists = browser->hists;
 	struct perf_hpp dummy_hpp = {
@@ -1523,7 +1525,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists, 0);
+		ret = fmt->header(fmt, &dummy_hpp, hists, line);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1628,14 +1630,21 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
 
 static void hists_browser__headers(struct hist_browser *browser)
 {
-	char headers[1024];
+	struct hists *hists = browser->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
 
-	hists_browser__scnprintf_headers(browser, headers,
-					 sizeof(headers));
+	int line;
 
-	ui_browser__gotorc(&browser->b, 0, 0);
-	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
-	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		char headers[1024];
+
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers), line);
+
+		ui_browser__gotorc(&browser->b, line, 0);
+		ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+		ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+	}
 }
 
 static void hist_browser__show_headers(struct hist_browser *browser)
-- 
cgit v0.10.2


From f3705b062eafc2867eb0e9ee3502bd59564f103c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Aug 2016 17:28:29 +0200
Subject: perf tools stdio: Display multiple header lines

Display multiple header lines in stdio output , if it's configured
within struct perf_hpp_list::nr_header_lines.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1470583710-1649-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 91b8e10..9b807cb 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -639,13 +639,10 @@ hists__fprintf_hierarchy_headers(struct hists *hists,
 	return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
 }
 
-static int
-hists__fprintf_standard_headers(struct hists *hists,
-				struct perf_hpp *hpp,
-				FILE *fp)
+static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
+			 int line, FILE *fp)
 {
 	struct perf_hpp_fmt *fmt;
-	unsigned int width;
 	const char *sep = symbol_conf.field_sep;
 	bool first = true;
 
@@ -658,14 +655,33 @@ hists__fprintf_standard_headers(struct hists *hists,
 		else
 			first = false;
 
-		fmt->header(fmt, hpp, hists, 0);
+		fmt->header(fmt, hpp, hists, line);
 		fprintf(fp, "%s", hpp->buf);
 	}
+}
 
-	fprintf(fp, "\n");
+static int
+hists__fprintf_standard_headers(struct hists *hists,
+				struct perf_hpp *hpp,
+				FILE *fp)
+{
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	struct perf_hpp_fmt *fmt;
+	unsigned int width;
+	const char *sep = symbol_conf.field_sep;
+	bool first = true;
+	int line;
+
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		/* first # is displayed one level up */
+		if (line)
+			fprintf(fp, "# ");
+		fprintf_line(hists, hpp, line, fp);
+		fprintf(fp, "\n");
+	}
 
 	if (sep)
-		return 1;
+		return hpp_list->nr_header_lines;
 
 	first = true;
 
@@ -689,7 +705,7 @@ hists__fprintf_standard_headers(struct hists *hists,
 
 	fprintf(fp, "\n");
 	fprintf(fp, "#\n");
-	return 3;
+	return hpp_list->nr_header_lines + 2;
 }
 
 static int hists__fprintf_headers(struct hists *hists, FILE *fp)
-- 
cgit v0.10.2


From 29659ab4e7d02c08d8f2d08db0b0f708bd8b6771 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Aug 2016 17:28:30 +0200
Subject: perf hists: Add support for header span

Add span argument for header callback function.

The handling of this argument is completely in the hands of the
callback. The only thing the caller ensures is it's zeroed on the
beginning.

Omitting span skipping in hierarchy headers and gtk code.

The c2c code use this to span header lines based on the entries span
configuration.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1470583710-1649-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 75da965..9ff0db4 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1034,7 +1034,8 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
 
 static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		       struct hists *hists __maybe_unused,
-		       int line __maybe_unused)
+		       int line __maybe_unused,
+		       int *span __maybe_unused)
 {
 	struct diff_hpp_fmt *dfmt =
 		container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4a7aac8..22e48a3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1514,6 +1514,7 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
 	struct perf_hpp_fmt *fmt;
 	size_t ret = 0;
 	int column = 0;
+	int span = 0;
 
 	if (symbol_conf.use_callchain) {
 		ret = scnprintf(buf, size, "  ");
@@ -1525,10 +1526,13 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists, line);
+		ret = fmt->header(fmt, &dummy_hpp, hists, line, &span);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
+		if (span)
+			continue;
+
 		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
@@ -1562,7 +1566,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 		if (column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists, 0);
+		ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1599,7 +1603,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 			}
 			first_col = false;
 
-			ret = fmt->header(fmt, &dummy_hpp, hists, 0);
+			ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
 			dummy_hpp.buf[ret] = '\0';
 
 			start = trim(dummy_hpp.buf);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 79cb5c4..a4f02de 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
 				strcat(buf, "+");
 			first_col = false;
 
-			fmt->header(fmt, &hpp, hists, 0);
+			fmt->header(fmt, &hpp, hists, 0, NULL);
 			strcat(buf, ltrim(rtrim(hpp.buf)));
 		}
 	}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 30457c6..b47fafc 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -230,7 +230,8 @@ static int hpp__width_fn(struct perf_hpp_fmt *fmt,
 }
 
 static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			  struct hists *hists, int line __maybe_unused)
+			  struct hists *hists, int line __maybe_unused,
+			  int *span __maybe_unused)
 {
 	int len = hpp__width_fn(fmt, hpp, hists);
 	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9b807cb..9b65f4a 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -549,7 +549,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				    struct perf_hpp_list_node, list);
 
 	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
-		fmt->header(fmt, hpp, hists, 0);
+		fmt->header(fmt, hpp, hists, 0, NULL);
 		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
 	}
 
@@ -569,7 +569,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 				header_width += fprintf(fp, "+");
 			first_col = false;
 
-			fmt->header(fmt, hpp, hists, 0);
+			fmt->header(fmt, hpp, hists, 0, NULL);
 
 			header_width += fprintf(fp, "%s", trim(hpp->buf));
 		}
@@ -645,18 +645,21 @@ static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
 	struct perf_hpp_fmt *fmt;
 	const char *sep = symbol_conf.field_sep;
 	bool first = true;
+	int span = 0;
 
 	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
-		if (!first)
+		if (!first && !span)
 			fprintf(fp, "%s", sep ?: "  ");
 		else
 			first = false;
 
-		fmt->header(fmt, hpp, hists, line);
-		fprintf(fp, "%s", hpp->buf);
+		fmt->header(fmt, hpp, hists, line, &span);
+
+		if (!span)
+			fprintf(fp, "%s", hpp->buf);
 	}
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 71a44c1..a002c93 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -230,7 +230,7 @@ struct perf_hpp {
 struct perf_hpp_fmt {
 	const char *name;
 	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		      struct hists *hists, int line);
+		      struct hists *hists, int line, int *span);
 	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		     struct hists *hists);
 	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a556de7..5ae3d32 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1492,7 +1492,8 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 }
 
 static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct hists *hists, int line __maybe_unused)
+			      struct hists *hists, int line __maybe_unused,
+			      int *span __maybe_unused)
 {
 	struct hpp_sort_entry *hse;
 	size_t len = fmt->user_len;
@@ -1798,7 +1799,8 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde,
 
 static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 			      struct hists *hists __maybe_unused,
-			      int line __maybe_unused)
+			      int line __maybe_unused,
+			      int *span __maybe_unused)
 {
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
-- 
cgit v0.10.2


From c12944f7faa7f76441d83c1413f13e8bc70162b2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 9 Aug 2016 14:56:13 -0300
Subject: perf disassemble: Move check for kallsyms + !kcore

We don't need to do all that filename logic to then just have to test
something unrelated and bail out, move it to the start of the function.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-lk1v4srtsktonnyp6t1o0uhx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4024d30..9882bc6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1165,11 +1165,10 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *
 int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
-	char *filename = dso__build_id_filename(dso, NULL, 0);
+	char *filename;
 	bool free_filename = true;
 	char command[PATH_MAX * 2];
 	FILE *file;
-	int err = 0;
 	char symfs_filename[PATH_MAX];
 	struct kcore_extract kce;
 	bool delete_extract = false;
@@ -1177,7 +1176,13 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 	int lineno = 0;
 	int nline;
 	pid_t pid;
+	int err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso))
+		goto out;
 
+	filename = dso__build_id_filename(dso, NULL, 0);
 	if (filename)
 		symbol__join_symfs(symfs_filename, filename);
 
@@ -1201,12 +1206,6 @@ fallback:
 		free_filename = false;
 	}
 
-	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
-	    !dso__is_kcore(dso)) {
-		err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
-		goto out_free_filename;
-	}
-
 	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
 		 filename, sym->name, map->unmap_ip(map, sym->start),
 		 map->unmap_ip(map, sym->end));
@@ -1338,6 +1337,7 @@ out_free_filename:
 		kcore_extract__delete(&kce);
 	if (free_filename)
 		free(filename);
+out:
 	return err;
 
 out_close_stdout:
-- 
cgit v0.10.2


From 3caee094d160b0ef92988099105e9a173a3805b0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 9 Aug 2016 15:16:37 -0300
Subject: perf disassemble: Simplify logic for picking the filename to
 disassemble

Lots of changes to support kcore, compressed modules, build-id files
left us with some spaguetti code, simplify it a bit, more to come.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-h70p7x451li3f2fhs44vzmm8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 9882bc6..8a99493 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1166,7 +1166,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
 	char *filename;
-	bool free_filename = true;
 	char command[PATH_MAX * 2];
 	FILE *file;
 	char symfs_filename[PATH_MAX];
@@ -1183,31 +1182,30 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 		goto out;
 
 	filename = dso__build_id_filename(dso, NULL, 0);
-	if (filename)
+	if (filename) {
 		symbol__join_symfs(symfs_filename, filename);
-
-	if (filename == NULL) {
+		free(filename);
+	} else {
 		if (dso->has_build_id)
 			return ENOMEM;
 		goto fallback;
-	} else if (dso__is_kcore(dso) ||
-		   readlink(symfs_filename, command, sizeof(command)) < 0 ||
-		   strstr(command, DSO__NAME_KALLSYMS) ||
-		   access(symfs_filename, R_OK)) {
-		free(filename);
+	}
+
+	if (dso__is_kcore(dso) ||
+	    readlink(symfs_filename, command, sizeof(command)) < 0 ||
+	    strstr(command, DSO__NAME_KALLSYMS) ||
+	    access(symfs_filename, R_OK)) {
 fallback:
 		/*
 		 * If we don't have build-ids or the build-id file isn't in the
 		 * cache, or is just a kallsyms file, well, lets hope that this
 		 * DSO is the same as when 'perf record' ran.
 		 */
-		filename = (char *)dso->long_name;
-		symbol__join_symfs(symfs_filename, filename);
-		free_filename = false;
+		symbol__join_symfs(symfs_filename, dso->long_name);
 	}
 
 	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
-		 filename, sym->name, map->unmap_ip(map, sym->start),
+		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
 		 map->unmap_ip(map, sym->end));
 
 	pr_debug("annotating [%p] %30s : [%p] %30s\n",
@@ -1222,11 +1220,6 @@ fallback:
 			delete_extract = true;
 			strlcpy(symfs_filename, kce.extract_filename,
 				sizeof(symfs_filename));
-			if (free_filename) {
-				free(filename);
-				free_filename = false;
-			}
-			filename = symfs_filename;
 		}
 	} else if (dso__needs_decompress(dso)) {
 		char tmp[PATH_MAX];
@@ -1235,14 +1228,14 @@ fallback:
 		bool ret;
 
 		if (kmod_path__parse_ext(&m, symfs_filename))
-			goto out_free_filename;
+			goto out;
 
 		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
 
 		fd = mkstemp(tmp);
 		if (fd < 0) {
 			free(m.ext);
-			goto out_free_filename;
+			goto out;
 		}
 
 		ret = decompress_to_file(m.ext, symfs_filename, fd);
@@ -1254,7 +1247,7 @@ fallback:
 		close(fd);
 
 		if (!ret)
-			goto out_free_filename;
+			goto out;
 
 		strcpy(symfs_filename, tmp);
 	}
@@ -1270,7 +1263,7 @@ fallback:
 		 map__rip_2objdump(map, sym->end),
 		 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
 		 symbol_conf.annotate_src ? "-S" : "",
-		 symfs_filename, filename);
+		 symfs_filename, symfs_filename);
 
 	pr_debug("Executing: %s\n", command);
 
@@ -1332,11 +1325,9 @@ out_remove_tmp:
 
 	if (dso__needs_decompress(dso))
 		unlink(symfs_filename);
-out_free_filename:
+
 	if (delete_extract)
 		kcore_extract__delete(&kce);
-	if (free_filename)
-		free(filename);
 out:
 	return err;
 
-- 
cgit v0.10.2


From 05ed3ac9417dac9be9dd63f3cf97416c80bad359 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 9 Aug 2016 15:32:53 -0300
Subject: perf disassemble: Extract logic to find file to pass to objdump to a
 separate function

Disentangling this a bit further, more to come.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-7bjv2xazuyzs0xw01mlwosn5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8a99493..25a9259 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1162,29 +1162,19 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *
 	return 0;
 }
 
-int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
 {
-	struct dso *dso = map->dso;
-	char *filename;
-	char command[PATH_MAX * 2];
-	FILE *file;
-	char symfs_filename[PATH_MAX];
-	struct kcore_extract kce;
-	bool delete_extract = false;
-	int stdout_fd[2];
-	int lineno = 0;
-	int nline;
-	pid_t pid;
-	int err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+	char linkname[PATH_MAX];
+	char *build_id_filename;
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso))
-		goto out;
+		return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
 
-	filename = dso__build_id_filename(dso, NULL, 0);
-	if (filename) {
-		symbol__join_symfs(symfs_filename, filename);
-		free(filename);
+	build_id_filename = dso__build_id_filename(dso, NULL, 0);
+	if (build_id_filename) {
+		__symbol__join_symfs(filename, filename_size, build_id_filename);
+		free(build_id_filename);
 	} else {
 		if (dso->has_build_id)
 			return ENOMEM;
@@ -1192,18 +1182,38 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 	}
 
 	if (dso__is_kcore(dso) ||
-	    readlink(symfs_filename, command, sizeof(command)) < 0 ||
-	    strstr(command, DSO__NAME_KALLSYMS) ||
-	    access(symfs_filename, R_OK)) {
+	    readlink(filename, linkname, sizeof(linkname)) < 0 ||
+	    strstr(linkname, DSO__NAME_KALLSYMS) ||
+	    access(filename, R_OK)) {
 fallback:
 		/*
 		 * If we don't have build-ids or the build-id file isn't in the
 		 * cache, or is just a kallsyms file, well, lets hope that this
 		 * DSO is the same as when 'perf record' ran.
 		 */
-		symbol__join_symfs(symfs_filename, dso->long_name);
+		__symbol__join_symfs(filename, filename_size, dso->long_name);
 	}
 
+	return 0;
+}
+
+int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
+{
+	struct dso *dso = map->dso;
+	char command[PATH_MAX * 2];
+	FILE *file;
+	char symfs_filename[PATH_MAX];
+	struct kcore_extract kce;
+	bool delete_extract = false;
+	int stdout_fd[2];
+	int lineno = 0;
+	int nline;
+	pid_t pid;
+	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+	if (err)
+		return err;
+
 	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
 		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
 		 map->unmap_ip(map, sym->end));
-- 
cgit v0.10.2


From 39ff526350059e61234d58676c13bcfcaac3a451 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Thu, 11 Aug 2016 10:20:56 -0600
Subject: tools: Copy coresight-pmu.h header file needed by perf tools

Directly accessing kernel files is not allowed anymore.  As such making
file coresight-pmu.h accessible by the perf tools and complain if this
copy strays from the one found in the main kernel tree.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vince Weaver <vince@deater.net>
Link: http://lkml.kernel.org/r/1470932464-726-2-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
new file mode 100644
index 0000000..7d41026
--- /dev/null
+++ b/tools/include/linux/coresight-pmu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_CORESIGHT_PMU_H
+#define _LINUX_CORESIGHT_PMU_H
+
+#define CORESIGHT_ETM_PMU_NAME "cs_etm"
+#define CORESIGHT_ETM_PMU_SEED  0x10
+
+/* ETMv3.5/PTM's ETMCR config bit */
+#define ETM_OPT_CYCACC  12
+#define ETM_OPT_TS      28
+
+static inline int coresight_get_trace_id(int cpu)
+{
+	/*
+	 * A trace ID of value 0 is invalid, so let's start at some
+	 * random value that fits in 7 bits and go from there.  Since
+	 * the common convention is to have data trace IDs be I(N) + 1,
+	 * set instruction trace IDs as a function of the CPU number.
+	 */
+	return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+}
+
+#endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f23a5e7..ff200c6 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -60,6 +60,7 @@ tools/include/asm-generic/bitops.h
 tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
+tools/include/linux/coresight-pmu.h
 tools/include/linux/filter.h
 tools/include/linux/hash.h
 tools/include/linux/kernel.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2d90875..aa7ab23 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -429,6 +429,9 @@ $(PERF_IN): prepare FORCE
 	@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
         (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
         || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/linux/coresight-pmu.h && ( \
+	(diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
+	|| echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
-- 
cgit v0.10.2


From fa1f456592347c6f40c9d37ea407b029fda5324a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 12 Aug 2016 20:41:01 -0300
Subject: perf report: Allow configuring the default sort order in
 ~/.perfconfig

Allows changing the default sort order from "comm,dso,symbol" to some
other default, for instance "sym,dso" may be more fitting for kernel
developers.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-pm1h5puxua8nsxksd68fjm8r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 15949e2..68c8919 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -382,6 +382,10 @@ call-graph.*::
 		histogram entry. Default is 0 which means no limitation.
 
 report.*::
+	report.sort_order::
+		Allows changing the default sort order from "comm,dso,symbol" to
+		some other default, for instance "sym,dso" may be more fitting for
+		kernel developers.
 	report.percent-limit::
 		This one is mostly the same as call-graph.threshold but works for
 		histogram entries. Entries having an overhead lower than this
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 949e5a1..b9e046b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,6 +89,10 @@ static int report__config(const char *var, const char *value, void *cb)
 		rep->queue_size = perf_config_u64(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "report.sort_order")) {
+		default_sort_order = strdup(value);
+		return 0;
+	}
 
 	return 0;
 }
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5ae3d32..1884d7f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -11,7 +11,7 @@
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
-const char	default_sort_order[] = "comm,dso,symbol";
+const char	*default_sort_order = "comm,dso,symbol";
 const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
 const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 const char	default_top_sort_order[] = "dso,symbol";
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7ca37ea..28c0524 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -28,7 +28,7 @@ extern const char *sort_order;
 extern const char *field_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
-extern const char default_sort_order[];
+extern const char *default_sort_order;
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern enum sort_mode sort__mode;
-- 
cgit v0.10.2


From 11d8f870c8eb8dd41ade688fbad6946ad69243fe Mon Sep 17 00:00:00 2001
From: Rui Teng <rui.teng@linux.vnet.ibm.com>
Date: Thu, 28 Jul 2016 10:05:57 +0800
Subject: perf tools: Use __weak definition from linux/compiler.h

Replace __attribute__((weak)) with __weak definition

Signed-off-by: Rui Teng <rui.teng@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1469671557-2256-2-git-send-email-rui.teng@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8f0db40..85dd0db 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -828,8 +828,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
  * default get_cpuid(): nothing gets recorded
  * actual implementation must be in arch/$(ARCH)/util/header.c
  */
-int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused,
-				     size_t sz __maybe_unused)
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
 {
 	return -1;
 }
-- 
cgit v0.10.2


From 11196b79164c95b8e5968ff63fa6b59536c9748f Mon Sep 17 00:00:00 2001
From: Rui Teng <rui.teng@linux.vnet.ibm.com>
Date: Wed, 10 Aug 2016 16:49:08 +0800
Subject: perf tools: Skip running the feature tests for 'make install-doc'

It is a requirement from the perf TODO list[1]:

''The feature tests should be performed only when a file that needs those
  tests, or at least only when some .c or .h file will be rebuilt. An
  initial step would be for 'make install-doc' not to run the feature
  tests, there it is not needed at all.''

By adding 'install-doc' to the NON_CONFIG_TARGETS, it will skip running
the feature tests for such target. The Auto-detecting system features
list will not be displayed:

  $ make install-doc
    BUILD:   Doing 'make -j2' parallel build
    SUBDIR   Documentation
  make[2]: Nothing to be done for 'install'.

[1] https://perf.wiki.kernel.org/index.php/Todo

Signed-off-by: Rui Teng <rui.teng@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1470818948-17784-1-git-send-email-rui.teng@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index aa7ab23..828cfd7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -165,7 +165,7 @@ SUBCMD_DIR	= $(srctree)/tools/lib/subcmd/
 # non-config cases
 config := 1
 
-NON_CONFIG_TARGETS := clean TAGS tags cscope help
+NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc
 
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
-- 
cgit v0.10.2


From 6637e6f1ac62e62a353612844db21cff9a17758b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 15 Aug 2016 17:25:43 +0100
Subject: perf hists browser: Remove superfluous null check on map

'map' is being already checked if it is NULL at the start of
do_zoom_dso(), so the second subsequent check is superfluous and can be
removed.

Signed-off-by: Colin King <colin.king@canonical.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1471278343-14999-1-git-send-email-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 22e48a3..f0611c9 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2440,8 +2440,6 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 		browser->hists->dso_filter = NULL;
 		ui_helpline__pop();
 	} else {
-		if (map == NULL)
-			return 0;
 		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"",
 				   __map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
 		browser->hists->dso_filter = map->dso;
-- 
cgit v0.10.2


From 17ce3dc7e5a0e4796cc7838d1f7b2531d0bca130 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:57:50 +0900
Subject: ftrace: kprobe: uprobe: Add x8/x16/x32/x64 for hexadecimal types

Add x8/x16/x32/x64 for hexadecimal type casting to kprobe/uprobe event
tracer.

These type casts can be used for integer arguments for explicitly
showing them in hexadecimal digits in formatted text.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151067029.12957.11591314629326414783.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ea52ec1..9109c8e 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -44,8 +44,8 @@ Synopsis of kprobe_events
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
-		  (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
-		  are supported.
+		  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+		  (x8/x16/x32/x64), "string" and bitfield are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 72d1cd4..7e6d28c 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -40,8 +40,8 @@ Synopsis of uprobe_tracer
    +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
    NAME=FETCHARG     : Set NAME as the argument name of FETCHARG.
    FETCHARG:TYPE     : Set TYPE as the type of FETCHARG. Currently, basic types
-		       (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
-		       are supported.
+		       (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+		       (x8/x16/x32/x64), "string" and bitfield are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9aedb0b..eb6c9f1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -253,6 +253,10 @@ static const struct fetch_type kprobes_fetch_type_table[] = {
 	ASSIGN_FETCH_TYPE(s16, u16, 1),
 	ASSIGN_FETCH_TYPE(s32, u32, 1),
 	ASSIGN_FETCH_TYPE(s64, u64, 1),
+	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
+	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
 
 	ASSIGN_FETCH_TYPE_END
 };
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 74e80a5..725af9d 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -36,24 +36,28 @@ const char *reserved_field_names[] = {
 };
 
 /* Printing  in basic type function template */
-#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)				\
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt)			\
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name,	\
 				void *data, void *ent)			\
 {									\
 	trace_seq_printf(s, " %s=" fmt, name, *(type *)data);		\
 	return !trace_seq_has_overflowed(s);				\
 }									\
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt;				\
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
-
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt;				\
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
+
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8,  u8,  "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "0x%Lx")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  s8,  "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s16, s16, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s32, s32, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s64, s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x8,  u8,  "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
 
 /* Print type function for string type */
 int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 45400ca..f0c470a 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -149,6 +149,11 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(s8);
 DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
 DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
 DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
+
 DECLARE_BASIC_PRINT_TYPE_FUNC(string);
 
 #define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type
@@ -234,6 +239,10 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\
 	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
 
+/* If ptype is an alias of atype, use this macro (show atype in format) */
+#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign)		\
+	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+
 #define ASSIGN_FETCH_TYPE_END {}
 
 #define FETCH_TYPE_STRING	0
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c534854..7a68732 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -211,6 +211,10 @@ static const struct fetch_type uprobes_fetch_type_table[] = {
 	ASSIGN_FETCH_TYPE(s16, u16, 1),
 	ASSIGN_FETCH_TYPE(s32, u32, 1),
 	ASSIGN_FETCH_TYPE(s64, u64, 1),
+	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
+	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
 
 	ASSIGN_FETCH_TYPE_END
 };
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index b303bcd..aa33325 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,13 +176,13 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
 TYPES
 -----
-Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
 String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
 
-- 
cgit v0.10.2


From 864256255597aad86abcecbe6c53da8852ded15b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:58:15 +0900
Subject: ftrace: probe: Add README entries for k/uprobe-events

Add README entries for kprobe-events and uprobe-events.
This allows user to check what options can be acceptable
for running kernel.
E.g. perf tools can choose correct types for the kernel.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151069524.12957.12957179170304055028.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dade4c9..1e2ce3b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4123,6 +4123,30 @@ static const char readme_msg[] =
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_KPROBE_EVENT
+	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
+	"\t\t\t  Write into this file to define/undefine new trace events.\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
+	"\t\t\t  Write into this file to define/undefine new trace events.\n"
+#endif
+#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+	"\t  accepts: event-definitions (one definition per line)\n"
+	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
+	"\t           -:[<group>/]<event>\n"
+#ifdef CONFIG_KPROBE_EVENT
+	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+	"\t    place: <path>:<offset>\n"
+#endif
+	"\t     args: <name>=fetcharg[:type]\n"
+	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+	"\t           $stack<index>, $stack, $retval, $comm\n"
+	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
+	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
 	"  events/\t\t- Directory containing all trace event subsystems:\n"
 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
-- 
cgit v0.10.2


From 180b20616ce57e93eb692170c793be94c456b1e2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:58:31 +0900
Subject: perf probe: Add supported for type casting by the running kernel

Add a checking routine what types are supported by the running kernel by
finding the pattern in <debugfs>/tracing/README.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151071172.12957.3340095690753291085.stgit@devbox
[ 'enum probe_type' has no negative entries, so ends up as 'unsigned', remove '< 0'
   test to fix the build on at least centos:5, debian:7 & ubuntu:12.04.5 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 9c3b9ed..697ef66 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -877,3 +877,60 @@ int probe_cache__show_all_caches(struct strfilter *filter)
 
 	return 0;
 }
+
+static struct {
+	const char *pattern;
+	bool	avail;
+	bool	checked;
+} probe_type_table[] = {
+#define DEFINE_TYPE(idx, pat, def_avail)	\
+	[idx] = {.pattern = pat, .avail = (def_avail)}
+	DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true),
+	DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true),
+	DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false),
+	DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true),
+	DEFINE_TYPE(PROBE_TYPE_BITFIELD,
+		    "* b<bit-width>@<bit-offset>/<container-size>", true),
+};
+
+bool probe_type_is_available(enum probe_type type)
+{
+	FILE *fp;
+	char *buf = NULL;
+	size_t len = 0;
+	bool target_line = false;
+	bool ret = probe_type_table[type].avail;
+
+	if (type >= PROBE_TYPE_END)
+		return false;
+	/* We don't have to check the type which supported by default */
+	if (ret || probe_type_table[type].checked)
+		return ret;
+
+	if (asprintf(&buf, "%s/README", tracing_path) < 0)
+		return ret;
+
+	fp = fopen(buf, "r");
+	if (!fp)
+		goto end;
+
+	zfree(&buf);
+	while (getline(&buf, &len, fp) > 0 && !ret) {
+		if (!target_line) {
+			target_line = !!strstr(buf, " type: ");
+			if (!target_line)
+				continue;
+		} else if (strstr(buf, "\t          ") != buf)
+			break;
+		ret = strglobmatch(buf, probe_type_table[type].pattern);
+	}
+	/* Cache the result */
+	probe_type_table[type].checked = true;
+	probe_type_table[type].avail = ret;
+
+	fclose(fp);
+end:
+	free(buf);
+
+	return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 9577b5c..eba44c3 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -19,6 +19,15 @@ struct probe_cache {
 	struct list_head entries;
 };
 
+enum probe_type {
+	PROBE_TYPE_U = 0,
+	PROBE_TYPE_S,
+	PROBE_TYPE_X,
+	PROBE_TYPE_STRING,
+	PROBE_TYPE_BITFIELD,
+	PROBE_TYPE_END,
+};
+
 #define PF_FL_UPROBE	1
 #define PF_FL_RW	2
 #define for_each_probe_cache_entry(entry, pcache) \
@@ -54,6 +63,7 @@ struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
 struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
 					const char *group, const char *event);
 int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
 #else	/* ! HAVE_LIBELF_SUPPORT */
 static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
 {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 5c290c6..24dbe23 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -39,6 +39,7 @@
 #include "util.h"
 #include "symbol.h"
 #include "probe-finder.h"
+#include "probe-file.h"
 
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS	64
-- 
cgit v0.10.2


From 925437872525ee229736a9a8bdf804fc98f75b44 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:58:47 +0900
Subject: perf probe: Support hexadecimal casting

Support hexadecimal unsigned integer casting by 'x'.  This allows user
to explicitly specify the output format of the probe arguments as
hexadecimal.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151072679.12957.4458656416765710753.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index aa33325..d217617 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,13 +176,13 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
 TYPES
 -----
-Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
 String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 24dbe23..f18cd6b 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -298,13 +298,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 	char sbuf[STRERR_BUFSIZE];
 	int bsize, boffs, total;
 	int ret;
-	char sign;
+	char prefix;
 
 	/* TODO: check all types */
-	if (cast && strcmp(cast, "string") != 0 &&
+	if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
 	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
 		/* Non string type is OK */
-		/* and respect signedness cast */
+		/* and respect signedness/hexadecimal cast */
 		tvar->type = strdup(cast);
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
@@ -366,11 +366,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 	}
 
 	if (cast && (strcmp(cast, "u") == 0))
-		sign = 'u';
+		prefix = 'u';
 	else if (cast && (strcmp(cast, "s") == 0))
-		sign = 's';
+		prefix = 's';
+	else if (cast && (strcmp(cast, "x") == 0) &&
+		 probe_type_is_available(PROBE_TYPE_X))
+		prefix = 'x';
 	else
-		sign = die_is_signed_type(&type) ? 's' : 'u';
+		prefix = die_is_signed_type(&type) ? 's' : 'u';
 
 	ret = dwarf_bytesize(&type);
 	if (ret <= 0)
@@ -384,7 +387,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
 		ret = MAX_BASIC_TYPE_BITS;
 	}
-	ret = snprintf(buf, 16, "%c%d", sign, ret);
+	ret = snprintf(buf, 16, "%c%d", prefix, ret);
 
 formatted:
 	if (ret < 0 || ret >= 16) {
-- 
cgit v0.10.2


From 9880ce4a69ba5c66a5ffdd711fe446bd0226bd8c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:59:07 +0900
Subject: perf probe: Use hexadecimal type by default if possible

Use hexadecimal type by default if it is available on current running
kernel.

This keeps the default behavior of perf probe after changing the output
format of 'u8/16/32/64' to unsigned decimal number.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151074685.12957.16415861010796255514.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f18cd6b..ac4740f 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -373,7 +373,8 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 		 probe_type_is_available(PROBE_TYPE_X))
 		prefix = 'x';
 	else
-		prefix = die_is_signed_type(&type) ? 's' : 'u';
+		prefix = die_is_signed_type(&type) ? 's' :
+			 probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
 
 	ret = dwarf_bytesize(&type);
 	if (ret <= 0)
-- 
cgit v0.10.2


From bdca79c2bf40556b664c9b1c32aec103e9bdb4a9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 Aug 2016 17:59:21 +0900
Subject: ftrace: kprobe: uprobe: Show u8/u16/u32/u64 types in decimal

Change kprobe/uprobe-tracer to show the arguments type-casted
with u8/u16/u32/u64 in decimal digits instead of hexadecimal.

To minimize compatibility issue, the arguments without type
casting are typed by x64 (or x32 for 32bit arch) by default.

Note: all arguments set by old perf probe without types are
shown in decimal by default.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naohiro Aota <naohiro.aota@hgst.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/147151076135.12957.14684546093034343894.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 9109c8e..e4991fb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -54,7 +54,10 @@ Types
 -----
 Several types are supported for fetch-args. Kprobe tracer will access memory
 by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
 has been paged out.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 7e6d28c..94b6b45 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -50,7 +50,10 @@ Types
 -----
 Several types are supported for fetch-args. Uprobe tracer will access memory
 by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
 String type is a special type, which fetches a "null-terminated" string from
 user space.
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 725af9d..8c0553d 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -46,10 +46,10 @@ int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name,	\
 const char PRINT_TYPE_FMT_NAME(tname)[] = fmt;				\
 NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
 
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8,  u8,  "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "0x%Lx")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8,  u8,  "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "%Lu")
 DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  s8,  "%d")
 DEFINE_BASIC_PRINT_TYPE_FUNC(s16, s16, "%d")
 DEFINE_BASIC_PRINT_TYPE_FUNC(s32, s32, "%d")
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f0c470a..0c0ae54 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -208,7 +208,7 @@ DEFINE_FETCH_##method(u32)		\
 DEFINE_FETCH_##method(u64)
 
 /* Default (unsigned long) fetch type */
-#define __DEFAULT_FETCH_TYPE(t) u##t
+#define __DEFAULT_FETCH_TYPE(t) x##t
 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index d217617..f37d123 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,13 +176,12 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
-
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
 TYPES
 -----
-Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
 String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
 
-- 
cgit v0.10.2


From dd6fa4e197f0123bcd751e47c171121b2451f148 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 21 Aug 2016 15:12:56 +0100
Subject: perf tools: Fix typo: "ehough" -> "enough"

Trivial typo fix in pr_debug message

Signed-off-by: Colin King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160821141256.7530-1-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 615780c..e6d1816 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -97,7 +97,7 @@ int test__backward_ring_buffer(int subtest __maybe_unused)
 
 	evlist = perf_evlist__new();
 	if (!evlist) {
-		pr_debug("No ehough memory to create evlist\n");
+		pr_debug("No enough memory to create evlist\n");
 		return TEST_FAIL;
 	}
 
-- 
cgit v0.10.2


From 17d4666f0613dfd1e2a3919d9b4c724acba8f8b1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 21 Aug 2016 15:16:03 +0100
Subject: perf test bpf: Fix typo: "ehough" -> "enough"

Trivial typo fix in pr_debug message

Signed-off-by: Colin King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160821141603.7832-1-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index fc54064..2673e86 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -125,7 +125,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	/* Instead of perf_evlist__new_default, don't add default events */
 	evlist = perf_evlist__new();
 	if (!evlist) {
-		pr_debug("No ehough memory to create evlist\n");
+		pr_debug("No enough memory to create evlist\n");
 		return TEST_FAIL;
 	}
 
-- 
cgit v0.10.2


From c77ce225d5ed1c5bfd4c6d58d3333d859ae49fd7 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 21 Aug 2016 15:19:24 +0100
Subject: perf bpf: Fix typo: "ehough" -> "enough"

Trivial typo fix in pr_debug message

Signed-off-by: Colin King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160821141924.8056-1-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 1f12e4e..2b2c9b8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -531,7 +531,7 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping,
 
 	ptevs = malloc(array_sz);
 	if (!ptevs) {
-		pr_debug("No ehough memory: alloc ptevs failed\n");
+		pr_debug("No enough memory: alloc ptevs failed\n");
 		return -ENOMEM;
 	}
 
-- 
cgit v0.10.2


From 5e30d55c71de058e4156080fe32d426c22d094cb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 22 Aug 2016 19:30:08 +0100
Subject: perf record: Fix spelling mistake "Finshed" -> "Finished"

Trivial fix to spelling mistake in pr_debug message.

Signed-off-by: Colin King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20160822183008.26368-1-colin.king@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a3792e8..03251c7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -96,7 +96,7 @@ backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
 	*start = head;
 	while (true) {
 		if (evt_head - head >= (unsigned int)size) {
-			pr_debug("Finshed reading backward ring buffer: rewind\n");
+			pr_debug("Finished reading backward ring buffer: rewind\n");
 			if (evt_head - head > (unsigned int)size)
 				evt_head -= pheader->size;
 			*end = evt_head;
@@ -106,7 +106,7 @@ backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 
 		if (pheader->size == 0) {
-			pr_debug("Finshed reading backward ring buffer: get start\n");
+			pr_debug("Finished reading backward ring buffer: get start\n");
 			*end = evt_head;
 			return 0;
 		}
-- 
cgit v0.10.2


From 04e11960aa9a5edbe612dd8623190e341aedab35 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 24 Aug 2016 14:57:58 +0900
Subject: perf probe: Remove unused tracing_dir variable

Remove unused tracing_dir variable from open_probe_events().

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147201827792.5713.4165387506020511920.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 697ef66..6f931e4 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -73,11 +73,10 @@ static void print_both_open_warning(int kerr, int uerr)
 static int open_probe_events(const char *trace_file, bool readwrite)
 {
 	char buf[PATH_MAX];
-	const char *tracing_dir = "";
 	int ret;
 
-	ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
-			 tracing_path, tracing_dir, trace_file);
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s",
+			 tracing_path, trace_file);
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
-- 
cgit v0.10.2


From ffe67c2fabf128122b30fbf0ac498928e171b0b3 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Sun, 21 Aug 2016 15:57:33 +0800
Subject: perf tools: Fix error handling of lzma decompression

lzma_decompress_to_file() never actually closes the file pointer, let's
fix it.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1471766253-1964-1-git-send-email-shawn.lin@rock-chips.com
[ Make err = -1, the common case, set it to 0 before the error label ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index 95a1acb..9ddea5c 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -29,6 +29,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 	lzma_action action = LZMA_RUN;
 	lzma_stream strm   = LZMA_STREAM_INIT;
 	lzma_ret ret;
+	int err = -1;
 
 	u8 buf_in[BUFSIZE];
 	u8 buf_out[BUFSIZE];
@@ -45,7 +46,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 	if (ret != LZMA_OK) {
 		pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
 			lzma_strerror(ret), ret);
-		return -1;
+		goto err_fclose;
 	}
 
 	strm.next_in   = NULL;
@@ -60,7 +61,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 
 			if (ferror(infile)) {
 				pr_err("lzma: read error: %s\n", strerror(errno));
-				return -1;
+				goto err_fclose;
 			}
 
 			if (feof(infile))
@@ -74,7 +75,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 
 			if (writen(output_fd, buf_out, write_size) != write_size) {
 				pr_err("lzma: write error: %s\n", strerror(errno));
-				return -1;
+				goto err_fclose;
 			}
 
 			strm.next_out  = buf_out;
@@ -83,13 +84,15 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 
 		if (ret != LZMA_OK) {
 			if (ret == LZMA_STREAM_END)
-				return 0;
+				break;
 
 			pr_err("lzma: failed %s\n", lzma_strerror(ret));
-			return -1;
+			goto err_fclose;
 		}
 	}
 
+	err = 0;
+err_fclose:
 	fclose(infile);
-	return 0;
+	return err;
 }
-- 
cgit v0.10.2


From b01141f4f59ce56e5ed177a1fc70b3ba4d676aca Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Aug 2016 16:09:21 -0300
Subject: perf annotate: Initialize the priv are in symbol__new()

We need to initializa some fields (right now just a mutex) when we
allocate the per symbol annotation struct, so do it at the symbol
constructor instead of (ab)using the filter mechanism for that.

This way we remove one of the few cases we have for that symbol filter,
which will eventually led to removing it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cvz34avlz1lez888lob95390@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9c1034d..f07b230 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -204,8 +204,6 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	struct perf_evsel *pos;
 	u64 total_nr_samples;
 
-	machines__set_symbol_filter(&session->machines, symbol__annotate_init);
-
 	if (ann->cpu_list) {
 		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
 					       ann->cpu_bitmap);
@@ -367,7 +365,10 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (annotate.session == NULL)
 		return -1;
 
-	symbol_conf.priv_size = sizeof(struct annotation);
+	ret = symbol__annotation_init();
+	if (ret < 0)
+		goto out_delete;
+
 	symbol_conf.try_vmlinux_path = true;
 
 	ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b9e046b..1a07c4c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -984,9 +984,9 @@ repeat:
 	 * implementation.
 	 */
 	if (ui__has_annotation()) {
-		symbol_conf.priv_size = sizeof(struct annotation);
-		machines__set_symbol_filter(&session->machines,
-					    symbol__annotate_init);
+		ret = symbol__annotation_init();
+		if (ret < 0)
+			goto error;
 		/*
  		 * For searching by name on the "Browse map details".
  		 * providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a3223aa..e8ca8dc8 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1324,7 +1324,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
 		callchain_param.order = ORDER_CALLER;
 
-	symbol_conf.priv_size = sizeof(struct annotation);
+	status = symbol__annotation_init();
+	if (status < 0)
+		goto out_delete_evlist;
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	if (symbol__init(NULL) < 0)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 25a9259..1b59e31 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -491,13 +491,6 @@ static struct ins *ins__find(const char *name)
 	return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp);
 }
 
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym)
-{
-	struct annotation *notes = symbol__annotation(sym);
-	pthread_mutex_init(&notes->lock, NULL);
-	return 0;
-}
-
 int symbol__alloc_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f67ccb0..e96f4da 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -177,7 +177,6 @@ enum symbol_disassemble_errno {
 int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
 				 int errnum, char *buf, size_t buflen);
 
-int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
 			    int min_pcnt, int max_lines, int context);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 37e8d20..863d69c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,6 +9,7 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <inttypes.h>
+#include "annotate.h"
 #include "build-id.h"
 #include "util.h"
 #include "debug.h"
@@ -235,8 +236,13 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
 	if (sym == NULL)
 		return NULL;
 
-	if (symbol_conf.priv_size)
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = (void *)sym;
+			pthread_mutex_init(&notes->lock, NULL);
+		}
 		sym = ((void *)sym) + symbol_conf.priv_size;
+	}
 
 	sym->start   = start;
 	sym->end     = len ? start + len : start;
@@ -1948,6 +1954,23 @@ static bool symbol__read_kptr_restrict(void)
 	return value;
 }
 
+int symbol__annotation_init(void)
+{
+	if (symbol_conf.initialized) {
+		pr_err("Annotation needs to be init before symbol__init()\n");
+		return -1;
+	}
+
+	if (symbol_conf.init_annotation) {
+		pr_warning("Annotation being initialized multiple times\n");
+		return 0;
+	}
+
+	symbol_conf.priv_size += sizeof(struct annotation);
+	symbol_conf.init_annotation = true;
+	return 0;
+}
+
 int symbol__init(struct perf_env *env)
 {
 	const char *symfs;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 699f7cb..f6c54d3 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -88,6 +88,7 @@ struct symbol_conf {
 	unsigned short	priv_size;
 	unsigned short	nr_events;
 	bool		try_vmlinux_path,
+			init_annotation,
 			force,
 			ignore_vmlinux,
 			ignore_vmlinux_buildid,
@@ -277,6 +278,8 @@ struct perf_env;
 int symbol__init(struct perf_env *env);
 void symbol__exit(void);
 void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
 				      const struct addr_location *al,
-- 
cgit v0.10.2


From b55cc4ed202175777690860c482ca2ae5184458e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Aug 2016 11:15:59 -0300
Subject: perf symbols: Rename ->ignore to ->idle

Since this is the only use thus far, and this mechanism is in place for
a long time. To clarify why symbols should be skipped or treated
differently, name it for the only use it has.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-oqpf82x2svir611ry15paufd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e8ca8dc8..e091900 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -680,7 +680,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 		return 1;
 
 	if (symbol__is_idle(sym))
-		sym->ignore = true;
+		sym->idle = 1;
 
 	return 0;
 }
@@ -783,7 +783,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		}
 	}
 
-	if (al.sym == NULL || !al.sym->ignore) {
+	if (al.sym == NULL || !al.sym->idle) {
 		struct hists *hists = evsel__hists(evsel);
 		struct hist_entry_iter iter = {
 			.evsel		= evsel,
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3674e77..9111e06 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -122,7 +122,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!node)
 				break;
 
-			if (node->sym && node->sym->ignore)
+			if (node->sym && node->sym->idle)
 				goto next;
 
 			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
@@ -181,7 +181,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
 	if (cursor != NULL) {
 		printed += sample__fprintf_callchain(sample, left_alignment,
 						     print_opts, cursor, fp);
-	} else if (!(al->sym && al->sym->ignore)) {
+	} else if (!(al->sym && al->sym->idle)) {
 		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
 		if (print_ip)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f6c54d3..e54ee7c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,7 @@ struct symbol {
 	u64		end;
 	u16		namelen;
 	u8		binding;
-	bool		ignore;
+	u8		idle:1;
 	u8		arch_sym;
 	char		name[0];
 };
-- 
cgit v0.10.2


From fd2275984d6e29c6737646ff39dd3de87a03f300 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 31 Aug 2016 10:04:27 -0300
Subject: perf probe: Do not use map_load filters for function

It is simpler to just do the loop, no need for globals and the last user
of such facility disappears.

Testing:

  # perf probe -F [a-z]*recvmsg
  aead_recvmsg
  compat_SyS_recvmsg
  compat_sys_recvmsg
  hash_recvmsg
  inet_recvmsg
  kernel_recvmsg
  netlink_recvmsg
  packet_recvmsg
  ping_recvmsg
  raw_recvmsg
  rawv6_recvmsg
  rng_recvmsg
  security_socket_recvmsg
  selinux_socket_recvmsg
  skcipher_recvmsg
  sock_common_recvmsg
  sock_no_recvmsg
  sock_recvmsg
  sys_recvmsg
  tcp_recvmsg
  udp_recvmsg
  udpv6_recvmsg
  unix_dgram_recvmsg
  unix_seqpacket_recvmsg
  unix_stream_recvmsg
  #

Without filters:

  # perf probe -F | tail -5
  zswap_pool_create
  zswap_pool_current
  zswap_update_total_size
  zswap_writeback_entry
  zswap_zpool_param_set
  #
  # perf probe -F | wc -l
  33311
  #

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160831130427.GA13095@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2873396..0bed2ee 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -3289,24 +3289,10 @@ out:
 	return ret;
 }
 
-/* TODO: don't use a global variable for filter ... */
-static struct strfilter *available_func_filter;
-
-/*
- * If a symbol corresponds to a function with global binding and
- * matches filter return 0. For all others return 1.
- */
-static int filter_available_functions(struct map *map __maybe_unused,
-				      struct symbol *sym)
-{
-	if (strfilter__compare(available_func_filter, sym->name))
-		return 0;
-	return 1;
-}
-
 int show_available_funcs(const char *target, struct strfilter *_filter,
 					bool user)
 {
+        struct rb_node *nd;
 	struct map *map;
 	int ret;
 
@@ -3324,9 +3310,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 		return -EINVAL;
 	}
 
-	/* Load symbols with given filter */
-	available_func_filter = _filter;
-	ret = map__load(map, filter_available_functions);
+	ret = map__load(map, NULL);
 	if (ret) {
 		if (ret == -2) {
 			char *str = strfilter__string(_filter);
@@ -3343,7 +3327,14 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 
 	/* Show all (filtered) symbols */
 	setup_pager();
-	dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+
+        for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+		if (strfilter__compare(_filter, pos->sym.name))
+			printf("%s\n", pos->sym.name);
+        }
+
 end:
 	if (user) {
 		map__put(map);
-- 
cgit v0.10.2


From 2a8d41b46540f224d77c255b5cc042d145e81b83 Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Tue, 30 Aug 2016 13:41:02 +0200
Subject: perf symbols: Demangle symbols for synthesized @plt entries.

The symbols in the synthesized @plt entries where not demangled before,
i.e. we could end up with entries such as:

    $ perf report
    Samples: 7K of event 'cycles:ppp', Event count (approx.): 6223833141
    Children      Self  Command          Shared Object           Symbol
    -   93.63%    28.89%  lab_mandelbrot   lab_mandelbrot        [.] main
        - 73.81% main
            - 33.57% hypot
              27.76% __hypot_finite
              15.97% __muldc3
               2.90% __muldc3@plt
               2.40% _ZNK6QImage6heightEv@plt
             + 2.14% QColor::rgb
               1.94% _ZNK6QImage5widthEv@plt
               1.92% cabs@plt

This patch remedies this issue by also applying demangling to the
synthesized symbols. The output for the above is now:

    $ perf report
    Samples: 7K of event 'cycles:ppp', Event count (approx.): 6223833141
    Children      Self  Command          Shared Object           Symbol
    -   93.63%    28.89%  lab_mandelbrot   lab_mandelbrot        [.] main
        - 73.81% main
            - 33.57% hypot
              27.76% __hypot_finite
              15.97% __muldc3
               2.90% __muldc3@plt
               2.40% QImage::height() const@plt
             + 2.14% QColor::rgb
               1.94% QImage::width() const@plt
               1.92% cabs@plt

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
LPU-Reference: 20160830114102.30863-1-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a811c13..fbe31ef 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -206,6 +206,37 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 	return NULL;
 }
 
+static bool want_demangle(bool is_kernel_sym)
+{
+	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	char *demangled = NULL;
+
+	/*
+	 * We need to figure out if the object was created from C++ sources
+	 * DWARF DW_compile_unit has this, but we don't always have access
+	 * to it...
+	 */
+	if (!want_demangle(dso->kernel || kmodule))
+	    return demangled;
+
+	demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+	if (demangled == NULL)
+		demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+	else if (rust_is_mangled(demangled))
+		/*
+		    * Input to Rust demangling is the BFD-demangled
+		    * name which it Rust-demangles in place.
+		    */
+		rust_demangle_sym(demangled);
+
+	return demangled;
+}
+
 #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
 	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
 	     idx < nr_entries; \
@@ -301,11 +332,19 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 
 		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
 					   nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
 			symidx = GELF_R_SYM(pos->r_info);
 			plt_offset += shdr_plt.sh_entsize;
 			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
 			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
+				 "%s@plt", elf_name);
+			free(demangled);
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
 					STB_GLOBAL, sympltname);
@@ -323,11 +362,19 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 		GElf_Rel pos_mem, *pos;
 		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
 					  nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
 			symidx = GELF_R_SYM(pos->r_info);
 			plt_offset += shdr_plt.sh_entsize;
 			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
 			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
+				 "%s@plt", elf_name);
+			free(demangled);
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
 					STB_GLOBAL, sympltname);
@@ -775,11 +822,6 @@ static u64 ref_reloc(struct kmap *kmap)
 	return 0;
 }
 
-static bool want_demangle(bool is_kernel_sym)
-{
-	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
-}
-
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
@@ -1070,29 +1112,10 @@ int dso__load_sym(struct dso *dso, struct map *map,
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
 new_symbol:
-		/*
-		 * We need to figure out if the object was created from C++ sources
-		 * DWARF DW_compile_unit has this, but we don't always have access
-		 * to it...
-		 */
-		if (want_demangle(dso->kernel || kmodule)) {
-			int demangle_flags = DMGL_NO_OPTS;
-			if (verbose)
-				demangle_flags = DMGL_PARAMS | DMGL_ANSI;
-
-			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
-			if (demangled == NULL)
-				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
-			else if (rust_is_mangled(demangled))
-				/*
-				 * Input to Rust demangling is the BFD-demangled
-				 * name which it Rust-demangles in place.
-				 */
-				rust_demangle_sym(demangled);
+		demangled = demangle_sym(dso, kmodule, elf_name);
+		if (demangled != NULL)
+			elf_name = demangled;
 
-			if (demangled != NULL)
-				elf_name = demangled;
-		}
 		f = symbol__new(sym.st_value, sym.st_size,
 				GELF_ST_BIND(sym.st_info), elf_name);
 		free(demangled);
-- 
cgit v0.10.2


From 893c5c798be99bcff5b235402dbd21e5aa03d76e Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Tue, 30 Aug 2016 15:41:06 +0200
Subject: perf config: Show default report configuration in example and docs

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
LPU-Reference: 20160830134106.21240-2-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 68c8919..cb081ac5 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -110,6 +110,14 @@ Given a $HOME/.perfconfig like this:
 		order = caller
 		sort-key = function
 
+	[report]
+		# Defaults
+		sort-order = comm,dso,symbol
+		percent-limit = 0
+		queue-size = 0
+		children = true
+		group = true
+
 Variables
 ~~~~~~~~~
 
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index 1d8d5bc..2b477c1 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example
@@ -27,3 +27,12 @@
 	use_offset = true
 	jump_arrows = true
 	show_nr_jumps = false
+
+[report]
+
+	# Defaults
+	sort-order = comm,dso,symbol
+	percent-limit = 0
+	queue-size = 0
+	children = true
+	group = true
-- 
cgit v0.10.2


From 1c20b1d15473a91e2fccecbcd2809d80ff4b4924 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 26 Aug 2016 01:24:27 +0900
Subject: perf probe: Show trace event definition

Add --definition/-D option for showing the trace-event definition in
stdout. This can be useful in debugging or combined with a shell script.

e.g.
  ----
  # perf probe --definition 'do_sys_open $params'
  p:probe/do_sys_open _text+2261728 dfd=%di:s32 filename=%si:u64 flags=%dx:s32 mode=%cx:u16
  ----

Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147214226712.23638.2240534040014013658.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index f37d123..56db4d4 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -21,6 +21,8 @@ or
 'perf probe' [options] --vars='PROBEPOINT'
 or
 'perf probe' [options] --funcs
+or
+'perf probe' [options] --definition='PROBE' [...]
 
 DESCRIPTION
 -----------
@@ -96,6 +98,11 @@ OPTIONS
 	can also list functions in a user space executable / shared library.
 	This also can accept a FILTER rule argument.
 
+-D::
+--definition=::
+	Show trace-event definition converted from given probe-event instead
+	of write it into tracing/[k,u]probe_events.
+
 --filter=FILTER::
 	(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
 	pattern, see FILTER PATTERN for detail.
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ee5b421..7a3d8c4 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -326,6 +326,11 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
 	if (ret < 0)
 		goto out_cleanup;
 
+	if (params.command == 'D') {	/* it shows definition */
+		ret = show_probe_trace_events(pevs, npevs);
+		goto out_cleanup;
+	}
+
 	ret = apply_perf_probe_events(pevs, npevs);
 	if (ret < 0)
 		goto out_cleanup;
@@ -454,6 +459,14 @@ out:
 	return ret;
 }
 
+#ifdef HAVE_DWARF_SUPPORT
+#define PROBEDEF_STR	\
+	"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]"
+#else
+#define PROBEDEF_STR	"[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]"
+#endif
+
+
 static int
 __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 {
@@ -479,13 +492,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			     opt_set_filter_with_command, DEFAULT_LIST_FILTER),
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
 		     opt_set_filter_with_command),
-	OPT_CALLBACK('a', "add", NULL,
-#ifdef HAVE_DWARF_SUPPORT
-		"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
-		" [[NAME=]ARG ...]",
-#else
-		"[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]",
-#endif
+	OPT_CALLBACK('a', "add", NULL, PROBEDEF_STR,
 		"probe point definition, where\n"
 		"\t\tGROUP:\tGroup name (optional)\n"
 		"\t\tEVENT:\tEvent name\n"
@@ -503,6 +510,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
 #endif
 		opt_add_probe_event),
+	OPT_CALLBACK('D', "definition", NULL, PROBEDEF_STR,
+		"Show trace event definition of given traceevent for k/uprobe_events.",
+		opt_add_probe_event),
 	OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
 		    " with existing name"),
 	OPT_CALLBACK('L', "line", NULL,
@@ -548,6 +558,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	set_option_flag(options, 'a', "add", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE);
 #ifdef HAVE_DWARF_SUPPORT
 	set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
@@ -644,6 +655,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 		break;
 	case 'a':
+	case 'D':
 		/* Ensure the last given target is used */
 		if (params.target && !params.target_used) {
 			pr_err("  Error: -x/-m must follow the probe definitions.\n");
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 0bed2ee..4a49cb8 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -3207,6 +3207,52 @@ int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 	return 0;
 }
 
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+	char *buf = synthesize_probe_trace_command(tev);
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	/* Showing definition always go stdout */
+	printf("%s\n", buf);
+	free(buf);
+
+	return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+	struct strlist *namelist = strlist__new(NULL, NULL);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	int i, j, ret = 0;
+
+	if (!namelist)
+		return -ENOMEM;
+
+	for (j = 0; j < npevs && !ret; j++) {
+		pev = &pevs[j];
+		for (i = 0; i < pev->ntevs && !ret; i++) {
+			tev = &pev->tevs[i];
+			/* Skip if the symbol is out of .text or blacklisted */
+			if (!tev->point.symbol && !pev->uprobes)
+				continue;
+
+			/* Set new name for tev (and update namelist) */
+			ret = probe_trace_event__set_name(tev, pev,
+							  namelist, true);
+			if (!ret)
+				ret = show_probe_trace_event(tev);
+		}
+	}
+	strlist__delete(namelist);
+
+	return ret;
+}
+
 int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 {
 	int i, ret = 0;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index f4f45db..6209408 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -147,6 +147,7 @@ int line_range__init(struct line_range *lr);
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
 void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int del_perf_probe_events(struct strfilter *filter);
 
-- 
cgit v0.10.2


From 428aff82e92a29da0e4276623180f9a98f2d5b16 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 26 Aug 2016 01:24:42 +0900
Subject: perf probe: Ignore vmlinux buildid if offline kernel is given

Ignore the buildid of running kernel when both of --definition and
--vmlinux is given because that kernel should be off-line.

This also skips post-processing of kprobe event for relocating symbol
and checking blacklist, because it can not be done on off-line kernel.

E.g. without this fix perf shows an error as below
  ----
  $ perf probe --vmlinux=./vmlinux-arm --definition do_sys_open
  ./vmlinux-arm with build id 7a1f76dd56e9c4da707cd3d6333f50748141434b not found, continuing without symbols
  Failed to find symbol do_sys_open in kernel
    Error: Failed to add events.
  ----
with this fix, we can get the definition
  ----
  $ perf probe --vmlinux=./vmlinux-arm --definition do_sys_open
  p:probe/do_sys_open do_sys_open+0
  ----

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147214228193.23638.12581984840822162131.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 56db4d4..e6c9902 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -36,6 +36,8 @@ OPTIONS
 -k::
 --vmlinux=PATH::
 	Specify vmlinux path which has debuginfo (Dwarf binary).
+	Only when using this with --definition, you can give an offline
+	vmlinux file.
 
 -m::
 --module=MODNAME|PATH::
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 7a3d8c4..b4220cd 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -654,8 +654,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			return ret;
 		}
 		break;
-	case 'a':
 	case 'D':
+		/*
+		 * If user gives offline vmlinux, ignore buildid, since
+		 * --definition doesn't change running kernel.
+		 */
+		if (symbol_conf.vmlinux_name)
+			symbol_conf.ignore_vmlinux_buildid = true;
+		/* fall through */
+	case 'a':
+
 		/* Ensure the last given target is used */
 		if (params.target && !params.target_used) {
 			pr_err("  Error: -x/-m must follow the probe definitions.\n");
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 4a49cb8..8a1e9e6 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -674,6 +674,10 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 	char *tmp;
 	int i, skipped = 0;
 
+	/* Skip post process if the target is an offline kernel */
+	if (symbol_conf.ignore_vmlinux_buildid)
+		return 0;
+
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {
 		pr_warning("Relocated base symbol is not found!\n");
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index fbe31ef..e680371 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -732,7 +732,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	}
 
 	/* Always reject images with a mismatched build-id: */
-	if (dso->has_build_id) {
+	if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
 		u8 build_id[BUILD_ID_SIZE];
 
 		if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
-- 
cgit v0.10.2


From 293d5b43948309434568f4dcbb36cce4c3c51bd5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 26 Aug 2016 01:24:57 +0900
Subject: perf probe: Support probing on offline cross-arch binary

Support probing on offline cross-architecture binary by adding getting
the target machine arch from ELF and choose correct register string for
the machine.

Here is an example:
  -----
  $ perf probe --vmlinux=./vmlinux-arm --definition 'do_sys_open $params'
  p:probe/do_sys_open do_sys_open+0 dfd=%r5:s32 filename=%r1:u32 flags=%r6:s32 mode=%r3:u16
  -----

Here, we can get probe/do_sys_open from above and append it to to the target
machine's tracing/kprobe_events file in the tracefs mountput, usually
/sys/kernel/debug/tracing/kprobe_events (or /sys/kernel/tracing/kprobe_events).

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147214229717.23638.6440579792548044658.stgit@devbox
[ Add definition for EM_AARCH64 to fix the build on at least centos 6, debian 7 & ubuntu 12.04.5 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/arm/include/dwarf-regs-table.h b/tools/perf/arch/arm/include/dwarf-regs-table.h
new file mode 100644
index 0000000..f298d03
--- /dev/null
+++ b/tools/perf/arch/arm/include/dwarf-regs-table.h
@@ -0,0 +1,9 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const arm_regstr_tbl[] = {
+	"%r0", "%r1", "%r2", "%r3", "%r4",
+	"%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
+	"%fp", "%ip", "%sp", "%lr", "%pc",
+};
+#endif
diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h
new file mode 100644
index 0000000..2675936
--- /dev/null
+++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h
@@ -0,0 +1,13 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const aarch64_regstr_tbl[] = {
+	"%r0", "%r1", "%r2", "%r3", "%r4",
+	"%r5", "%r6", "%r7", "%r8", "%r9",
+	"%r10", "%r11", "%r12", "%r13", "%r14",
+	"%r15", "%r16", "%r17", "%r18", "%r19",
+	"%r20", "%r21", "%r22", "%r23", "%r24",
+	"%r25", "%r26", "%r27", "%r28", "%r29",
+	"%lr", "%sp",
+};
+#endif
diff --git a/tools/perf/arch/powerpc/include/dwarf-regs-table.h b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..db4730f
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
@@ -0,0 +1,27 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
+ */
+#define REG_DWARFNUM_NAME(reg, idx)	[idx] = "%" #reg
+
+static const char * const powerpc_regstr_tbl[] = {
+	"%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4",
+	"%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9",
+	"%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14",
+	"%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19",
+	"%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24",
+	"%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29",
+	"%gpr30", "%gpr31",
+	REG_DWARFNUM_NAME(msr,   66),
+	REG_DWARFNUM_NAME(ctr,   109),
+	REG_DWARFNUM_NAME(link,  108),
+	REG_DWARFNUM_NAME(xer,   101),
+	REG_DWARFNUM_NAME(dar,   119),
+	REG_DWARFNUM_NAME(dsisr, 118),
+};
+
+#endif
diff --git a/tools/perf/arch/s390/include/dwarf-regs-table.h b/tools/perf/arch/s390/include/dwarf-regs-table.h
new file mode 100644
index 0000000..9da74a9
--- /dev/null
+++ b/tools/perf/arch/s390/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const s390_regstr_tbl[] = {
+	"%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+	"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/sh/include/dwarf-regs-table.h b/tools/perf/arch/sh/include/dwarf-regs-table.h
new file mode 100644
index 0000000..3a2deaf
--- /dev/null
+++ b/tools/perf/arch/sh/include/dwarf-regs-table.h
@@ -0,0 +1,25 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+const char * const sh_regstr_tbl[] = {
+	"r0",
+	"r1",
+	"r2",
+	"r3",
+	"r4",
+	"r5",
+	"r6",
+	"r7",
+	"r8",
+	"r9",
+	"r10",
+	"r11",
+	"r12",
+	"r13",
+	"r14",
+	"r15",
+	"pc",
+	"pr",
+};
+
+#endif
diff --git a/tools/perf/arch/sparc/include/dwarf-regs-table.h b/tools/perf/arch/sparc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..12c0761
--- /dev/null
+++ b/tools/perf/arch/sparc/include/dwarf-regs-table.h
@@ -0,0 +1,18 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const sparc_regstr_tbl[] = {
+	"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+	"%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+	"%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+	"%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+	"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+	"%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+	"%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+	"%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+	"%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+	"%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+	"%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+	"%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+#endif
diff --git a/tools/perf/arch/x86/include/dwarf-regs-table.h b/tools/perf/arch/x86/include/dwarf-regs-table.h
new file mode 100644
index 0000000..39ac7cb
--- /dev/null
+++ b/tools/perf/arch/x86/include/dwarf-regs-table.h
@@ -0,0 +1,14 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const x86_32_regstr_tbl[] = {
+	"%ax", "%cx", "%dx", "%bx", "$stack",/* Stack address instead of %sp */
+	"%bp", "%si", "%di",
+};
+
+static const char * const x86_64_regstr_tbl[] = {
+	"%ax", "dx", "%cx", "%bx", "%si", "%di",
+	"%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
+	"%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/xtensa/include/dwarf-regs-table.h b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
new file mode 100644
index 0000000..aa0444a
--- /dev/null
+++ b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const xtensa_regstr_tbl[] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+#endif
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 91c5f6e..f1a6d17 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -98,6 +98,7 @@ endif
 
 libperf-$(CONFIG_DWARF) += probe-finder.o
 libperf-$(CONFIG_DWARF) += dwarf-aux.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
 
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 0000000..62bc4a8
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,59 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <util.h>
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+	switch (machine) {
+	case EM_NONE:	/* Generic arch - use host arch */
+		return get_arch_regstr(n);
+	case EM_386:
+		return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+	case EM_X86_64:
+		return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+	case EM_ARM:
+		return __get_dwarf_regstr(arm_regstr_tbl, n);
+	case EM_AARCH64:
+		return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+	case EM_SH:
+		return __get_dwarf_regstr(sh_regstr_tbl, n);
+	case EM_S390:
+		return __get_dwarf_regstr(s390_regstr_tbl, n);
+	case EM_PPC:
+	case EM_PPC64:
+		return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+	case EM_SPARC:
+	case EM_SPARCV9:
+		return __get_dwarf_regstr(sparc_regstr_tbl, n);
+	case EM_XTENSA:
+		return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+	}
+	return NULL;
+}
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 07c644e..43bfd8d 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -3,6 +3,12 @@
 
 #ifdef HAVE_DWARF_SUPPORT
 const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
 #endif
 
 #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ac4740f..508b61c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -171,6 +171,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
  */
 static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 				     Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+				     unsigned int machine,
 				     struct probe_trace_arg *tvar)
 {
 	Dwarf_Attribute attr;
@@ -266,7 +267,7 @@ static_var:
 	if (!tvar)
 		return ret2;
 
-	regs = get_arch_regstr(regn);
+	regs = get_dwarf_regstr(regn, machine);
 	if (!regs) {
 		/* This should be a bug in DWARF or this tool */
 		pr_warning("Mapping for the register number %u "
@@ -543,7 +544,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 		 dwarf_diename(vr_die));
 
 	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
-					&pf->sp_die, pf->tvar);
+					&pf->sp_die, pf->machine, pf->tvar);
 	if (ret == -ENOENT || ret == -EINVAL) {
 		pr_err("Failed to find the location of the '%s' variable at this address.\n"
 		       " Perhaps it has been optimized out.\n"
@@ -1106,11 +1107,8 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
 				  struct probe_finder *pf)
 {
 	int ret = 0;
-
-#if _ELFUTILS_PREREQ(0, 142)
 	Elf *elf;
 	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
 
 	if (pf->cfi_eh || pf->cfi_dbg)
 		return debuginfo__find_probe_location(dbg, pf);
@@ -1123,11 +1121,18 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
 	if (gelf_getehdr(elf, &ehdr) == NULL)
 		return -EINVAL;
 
-	if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
-	    shdr.sh_type == SHT_PROGBITS)
-		pf->cfi_eh = dwarf_getcfi_elf(elf);
+	pf->machine = ehdr.e_machine;
+
+#if _ELFUTILS_PREREQ(0, 142)
+	do {
+		GElf_Shdr shdr;
+
+		if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+		    shdr.sh_type == SHT_PROGBITS)
+			pf->cfi_eh = dwarf_getcfi_elf(elf);
 
-	pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+		pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+	} while (0);
 #endif
 
 	ret = debuginfo__find_probe_location(dbg, pf);
@@ -1155,7 +1160,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
 	    (tag == DW_TAG_variable && vf->vars)) {
 		if (convert_variable_location(die_mem, vf->pf->addr,
 					      vf->pf->fb_ops, &pf->sp_die,
-					      NULL) == 0) {
+					      pf->machine, NULL) == 0) {
 			vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
 			if (vf->args[vf->nargs].var == NULL) {
 				vf->ret = -ENOMEM;
@@ -1318,7 +1323,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 	    tag == DW_TAG_variable) {
 		ret = convert_variable_location(die_mem, af->pf.addr,
 						af->pf.fb_ops, &af->pf.sp_die,
-						NULL);
+						af->pf.machine, NULL);
 		if (ret == 0 || ret == -ERANGE) {
 			int ret2;
 			bool externs = !af->child;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 51137fc..f1d8558 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -80,6 +80,7 @@ struct probe_finder {
 	Dwarf_CFI		*cfi_dbg;
 #endif
 	Dwarf_Op		*fb_ops;	/* Frame base attribute */
+	unsigned int		machine;	/* Target machine arch */
 	struct perf_probe_arg	*pvar;		/* Current target variable */
 	struct probe_trace_arg	*tvar;		/* Current result variable */
 };
-- 
cgit v0.10.2


From e50243bbeb528e92e31e03e560b557737c9def3c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 26 Aug 2016 23:57:58 +0900
Subject: perf probe: Ignore vmlinux Build-id when offline vmlinux given

Ignore vmlinux build-id when user gives offline vmlinux if the command
does not affect running kernel.

perf-probe has several actions some of them does not change the running
kernel, like --lines, --vars, and --funcs.

e.g.
  -----
  $ ./perf probe -k ./vmlinux-arm -V do_sys_open:14
  Available variables at do_sys_open:14
          @<do_sys_open+202>
                  char*   filename
                  int     dfd
                  int     fd
                  int     flags
                  struct filename*        tmp
                  struct open_flags       op
                  umode_t mode
  -----

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147222347320.5088.2582658035296667520.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index b4220cd..f87996b 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -611,6 +611,14 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	 */
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 
+	/*
+	 * Except for --list, --del and --add, other command doesn't depend
+	 * nor change running kernel. So if user gives offline vmlinux,
+	 * ignore its buildid.
+	 */
+	if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+		symbol_conf.ignore_vmlinux_buildid = true;
+
 	switch (params.command) {
 	case 'l':
 		if (params.uprobes) {
@@ -655,13 +663,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 		break;
 	case 'D':
-		/*
-		 * If user gives offline vmlinux, ignore buildid, since
-		 * --definition doesn't change running kernel.
-		 */
-		if (symbol_conf.vmlinux_name)
-			symbol_conf.ignore_vmlinux_buildid = true;
-		/* fall through */
 	case 'a':
 
 		/* Ensure the last given target is used */
-- 
cgit v0.10.2


From e267769ed4d22144d317d934cbce382cc7a8cca0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 10:26:49 -0300
Subject: perf test vmlinux: Clarify which -v lines are errors or warning

When the 'perf test -v vmlinux' test fails, it is not clear which of the
lines are errors or warnings, clarify that adding ERR/WARN prefixes:

  # perf test -F -v 1
   1: vmlinux symtab matches kallsyms                          :
  --- start ---
  Looking at the vmlinux_path (8 entries long)
  Using /lib/modules/4.8.0-rc4+/build/vmlinux for symbols
  ERR : 0xffffffffb7001000: diff name v: xen_hypercall_set_trap_table k: hypercall_page
  WARN: 0xffffffffb7077970: diff end addr for aesni_gcm_dec v: 0xffffffffb707a2f2 k: 0xffffffffb7077a02
  WARN: 0xffffffffb707a300: diff end addr for aesni_gcm_enc v: 0xffffffffb707cc03 k: 0xffffffffb707a392
  WARN: 0xffffffffb707f950: diff end addr for aesni_gcm_enc_avx_gen2 v: 0xffffffffb7084ef6 k: 0xffffffffb707f9c3
  WARN: 0xffffffffb7084f00: diff end addr for aesni_gcm_dec_avx_gen2 v: 0xffffffffb708a691 k: 0xffffffffb7084f73
  WARN: 0xffffffffb708aa10: diff end addr for aesni_gcm_enc_avx_gen4 v: 0xffffffffb708f844 k: 0xffffffffb708aa83
  WARN: 0xffffffffb708f850: diff end addr for aesni_gcm_dec_avx_gen4 v: 0xffffffffb709486f k: 0xffffffffb708f8c3
  ERR : 0xffffffffb71a6e50: diff name v: perf_pmu_commit_txn.part.98 k: perf_pmu_cancel_txn.part.97
  ERR : 0xffffffffb752e480: diff name v: wakeup_expire_count_show.part.5 k: wakeup_active_count_show.part.7
  ERR : 0xffffffffb76e8d00: diff name v: phys_switch_id_show.part.11 k: phys_port_name_show.part.12
  WARN: Maps only in vmlinux:
   ffffffffb7d7d000-ffffffffb7eeaac8 117d000 [kernel].init.text
   ffffffffb7eeaac8-ffffffffc03ad000 12eaac8 [kernel].exit.text
  WARN: Maps in vmlinux with a different name in kallsyms:
  WARN: Maps only in kallsyms:
  ---- end ----
  vmlinux symtab matches kallsyms: FAILED!
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-n5ml8m7y9x8kzvxt09ipku88@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e63abab..6bd5bf9 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -143,7 +143,7 @@ next_pair:
 				 */
 				s64 skew = mem_end - UM(pair->end);
 				if (llabs(skew) >= page_size)
-					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+					pr_debug("WARN: %#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
 						 mem_start, sym->name, mem_end,
 						 UM(pair->end));
 
@@ -161,15 +161,15 @@ next_pair:
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
 
-					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("ERR : %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, pair->name);
 				} else {
-					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("ERR : %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, first_pair->name);
 				}
 			}
 		} else
-			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
+			pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
 				 mem_start, sym->name);
 
 		err = -1;
@@ -178,7 +178,7 @@ next_pair:
 	if (!verbose)
 		goto out;
 
-	pr_info("Maps only in vmlinux:\n");
+	pr_info("WARN: Maps only in vmlinux:\n");
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *
@@ -198,7 +198,7 @@ next_pair:
 			map__fprintf(map, stderr);
 	}
 
-	pr_info("Maps in vmlinux with a different name in kallsyms:\n");
+	pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *pair;
@@ -212,17 +212,17 @@ next_pair:
 
 		if (pair->start == mem_start) {
 			pair->priv = 1;
-			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+			pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				map->start, map->end, map->pgoff, map->dso->name);
 			if (mem_end != pair->end)
-				pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
+				pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
 					pair->start, pair->end, pair->pgoff);
 			pr_info(" %s\n", pair->dso->name);
 			pair->priv = 1;
 		}
 	}
 
-	pr_info("Maps only in kallsyms:\n");
+	pr_info("WARN: Maps only in kallsyms:\n");
 
 	maps = &kallsyms.kmaps.maps[type];
 
-- 
cgit v0.10.2


From 54da07695a0c11b342815be0d8f1796c88765bde Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 10:40:57 -0300
Subject: perf test vmlinux: Avoid printing headers for empty lists

Before:

  # perf test -F -v 1
   1: vmlinux symtab matches kallsyms:
  --- start ---
<SNIP>
  WARN: Maps only in vmlinux:
   ffffffffb7d7d000-ffffffffb7eeaac8 117d000 [kernel].init.text
   ffffffffb7eeaac8-ffffffffc03ad000 12eaac8 [kernel].exit.text
  WARN: Maps in vmlinux with a different name in kallsyms:
  WARN: Maps only in kallsyms:
  ---- end ----
  vmlinux symtab matches kallsyms: Ok
  #

The two last WARN lines are now suppressed, since there are no such
cases detected.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-9ww8uvzl682ykaw8ht1tozlr@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 6bd5bf9..450f699 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -28,6 +28,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	enum map_type type = MAP__FUNCTION;
 	struct maps *maps = &vmlinux.kmaps.maps[type];
 	u64 mem_start, mem_end;
+	bool header_printed;
 
 	/*
 	 * Step 1:
@@ -178,7 +179,7 @@ next_pair:
 	if (!verbose)
 		goto out;
 
-	pr_info("WARN: Maps only in vmlinux:\n");
+	header_printed = false;
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *
@@ -192,13 +193,18 @@ next_pair:
 						(map->dso->kernel ?
 							map->dso->short_name :
 							map->dso->name));
-		if (pair)
+		if (pair) {
 			pair->priv = 1;
-		else
+		} else {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in vmlinux:\n");
+				header_printed = true;
+			}
 			map__fprintf(map, stderr);
+		}
 	}
 
-	pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+	header_printed = false;
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *pair;
@@ -211,7 +217,11 @@ next_pair:
 			continue;
 
 		if (pair->start == mem_start) {
-			pair->priv = 1;
+			if (!header_printed) {
+				pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+				header_printed = true;
+			}
+
 			pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				map->start, map->end, map->pgoff, map->dso->name);
 			if (mem_end != pair->end)
@@ -222,13 +232,18 @@ next_pair:
 		}
 	}
 
-	pr_info("WARN: Maps only in kallsyms:\n");
+	header_printed = false;
 
 	maps = &kallsyms.kmaps.maps[type];
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
-		if (!map->priv)
+		if (!map->priv) {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in kallsyms:\n");
+				header_printed = true;
+			}
 			map__fprintf(map, stderr);
+		}
 	}
 out:
 	machine__exit(&kallsyms);
-- 
cgit v0.10.2


From 7e1b659545b37ed629ee43d4daf72a02dc06e195 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 10:40:57 -0300
Subject: perf test vmlinux: Tolerate symbol aliases

The algorithms used to prune aliases in symbols__fixup_duplicate() uses
information available on ELF symtabs that are not present on
/proc/kallsyms, so it picks different aliases as "best" for vmlinux and
kallsyms.

We could probably improve a bit this by having a list of aliases for the
"best" symbols picked, instead of throwing this info, but that is left
for when we find a real need.

With this, 'perf test vmlinux' passes:

  # perf test -F 1
   1: vmlinux symtab matches kallsyms: Ok
  #

When we ask for verbose mode, we can see those warning:

  # perf test -F -v 1
   1: vmlinux symtab matches kallsyms:
  --- start ---
  Looking at the vmlinux_path (8 entries long)
  Using /lib/modules/4.8.0-rc4+/build/vmlinux for symbols
  WARN: 0xffffffffb7001000: diff name v: xen_hypercall_set_trap_table k: hypercall_page
  WARN: 0xffffffffb7077970: diff end addr for aesni_gcm_dec v: 0xffffffffb707a2f2 k: 0xffffffffb7077a02
  WARN: 0xffffffffb707a300: diff end addr for aesni_gcm_enc v: 0xffffffffb707cc03 k: 0xffffffffb707a392
  WARN: 0xffffffffb707f950: diff end addr for aesni_gcm_enc_avx_gen2 v: 0xffffffffb7084ef6 k: 0xffffffffb707f9c3
  WARN: 0xffffffffb7084f00: diff end addr for aesni_gcm_dec_avx_gen2 v: 0xffffffffb708a691 k: 0xffffffffb7084f73
  WARN: 0xffffffffb708aa10: diff end addr for aesni_gcm_enc_avx_gen4 v: 0xffffffffb708f844 k: 0xffffffffb708aa83
  WARN: 0xffffffffb708f850: diff end addr for aesni_gcm_dec_avx_gen4 v: 0xffffffffb709486f k: 0xffffffffb708f8c3
  WARN: 0xffffffffb71a6e50: diff name v: perf_pmu_commit_txn.part.98 k: perf_pmu_cancel_txn.part.97
  WARN: 0xffffffffb752e480: diff name v: wakeup_expire_count_show.part.5 k: wakeup_active_count_show.part.7
  WARN: 0xffffffffb76e8d00: diff name v: phys_switch_id_show.part.11 k: phys_port_name_show.part.12
  WARN: Maps only in vmlinux:
   ffffffffb7d7d000-ffffffffb7eeaac8 117d000 [kernel].init.text
   ffffffffb7eeaac8-ffffffffc03ad000 12eaac8 [kernel].exit.text
  ---- end ----
  vmlinux symtab matches kallsyms: Ok
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-6v5w1k8rpx4ggczlkw730vt0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 450f699..77513bf 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -155,19 +155,20 @@ next_pair:
 				 * kallsyms.
 				 */
 				continue;
-
 			} else {
 				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
 
-					pr_debug("ERR : %#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, pair->name);
 				} else {
-					pr_debug("ERR : %#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, first_pair->name);
 				}
+
+				continue;
 			}
 		} else
 			pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
-- 
cgit v0.10.2


From c97b40e4d15f13a36cd037d598e45cbe9e1e5757 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 10:56:06 -0300
Subject: perf symbols: Check symbol_conf.allow_aliases for kallsyms loading
 too

We can allow aliases to be kept, but we were checking this just when
loading vmlinux files, be consistent, do it for any symbol table loading
code that calls symbol__fixup_duplicate() by making this function check
.allow_aliases instead.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 680d926a8cb0 ("perf symbols: Allow symbol alias when loading map for symbol name")
Link: http://lkml.kernel.org/n/tip-z0avp0s6cfjckc4xj3pdfjdz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e680371..9ba6e49 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1136,8 +1136,7 @@ new_symbol:
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		if (!symbol_conf.allow_aliases)
-			symbols__fixup_duplicate(&dso->symbols[map->type]);
+		symbols__fixup_duplicate(&dso->symbols[map->type]);
 		symbols__fixup_end(&dso->symbols[map->type]);
 		if (kmap) {
 			/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 863d69c..ef2f913 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -153,6 +153,9 @@ void symbols__fixup_duplicate(struct rb_root *symbols)
 	struct rb_node *nd;
 	struct symbol *curr, *next;
 
+	if (symbol_conf.allow_aliases)
+		return;
+
 	nd = rb_first(symbols);
 
 	while (nd) {
-- 
cgit v0.10.2


From 432746f8e0b6a82ba832b771afe31abd51af6752 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 11:00:23 -0300
Subject: perf symbols: Fixup symbol sizes before picking best ones

When we call symbol__fixup_duplicate() we use algorithms to pick the
"best" symbols for cases where there are various functions/aliases to an
address, and those check zero size symbols, which, before calling
symbol__fixup_end() are _all_ symbols in a just parsed kallsyms file.

So first fixup the end, then fixup the duplicates.

Found while trying to figure out why 'perf test vmlinux' failed, see the
output of 'perf test -v vmlinux' to see cases where the symbols picked
as best for vmlinux don't match the ones picked for kallsyms.

Cc: Anton Blanchard <anton@samba.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 694bf407b061 ("perf symbols: Add some heuristics for choosing the best duplicate symbol")
Link: http://lkml.kernel.org/n/tip-rxqvdgr0mqjdxee0kf8i2ufn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 9ba6e49..295d314 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1136,8 +1136,8 @@ new_symbol:
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		symbols__fixup_duplicate(&dso->symbols[map->type]);
 		symbols__fixup_end(&dso->symbols[map->type]);
+		symbols__fixup_duplicate(&dso->symbols[map->type]);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ef2f913..98cd503 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1243,8 +1243,8 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
 	if (kallsyms__delta(map, filename, &delta))
 		return -1;
 
-	symbols__fixup_duplicate(&dso->symbols[map->type]);
 	symbols__fixup_end(&dso->symbols[map->type]);
+	symbols__fixup_duplicate(&dso->symbols[map->type]);
 
 	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
-- 
cgit v0.10.2


From b3f33f930606a55b547ac4ef5a32df2c23a44ec1 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Wed, 3 Aug 2016 14:28:44 +0530
Subject: perf probe: Add helper function to check if probe with variable

Introduce helper function instead of inline code and replace hardcoded
strings "$vars" and "$params" with their corresponding macros.

perf_probe_with_var() is not declared as static since it will be called
from different file in subsequent patch.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1470214725-5023-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8a1e9e6..a543e9c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1618,19 +1618,27 @@ out:
 	return ret;
 }
 
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+	int i = 0;
+
+	for (i = 0; i < pev->nargs; i++)
+		if (is_c_varname(pev->args[i].var)              ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+			return true;
+	return false;
+}
+
 /* Return true if this perf_probe_event requires debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
 {
-	int i;
-
 	if (pev->point.file || pev->point.line || pev->point.lazy_line)
 		return true;
 
-	for (i = 0; i < pev->nargs; i++)
-		if (is_c_varname(pev->args[i].var) ||
-		    !strcmp(pev->args[i].var, "$params") ||
-		    !strcmp(pev->args[i].var, "$vars"))
-			return true;
+	if (perf_probe_with_var(pev))
+		return true;
 
 	return false;
 }
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 6209408..8091d15 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -128,6 +128,8 @@ char *synthesize_perf_probe_point(struct perf_probe_point *pp);
 int perf_probe_event__copy(struct perf_probe_event *dst,
 			   struct perf_probe_event *src);
 
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
 /* Check the perf_probe_event needs debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
 
-- 
cgit v0.10.2


From e47392bf9c0613a058cd20ee89d8ce9d957d4b24 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Wed, 3 Aug 2016 14:28:45 +0530
Subject: perf uprobe: Skip prologue if program compiled without optimization

The function prologue prepares stack and registers before executing
function logic.

When target program is compiled without optimization, function parameter
information is only valid after the prologue.

When we probe entrypc of the function, and try to record a function
parameter, it contains a garbage value.

For example:

  $ vim test.c
    #include <stdio.h>

    void foo(int i)
    {
       printf("i: %d\n", i);
    }

    int main()
    {
      foo(42);
      return 0;
    }

  $ gcc -g test.c -o test
  $ objdump -dl test | less
    foo():
    /home/ravi/test.c:4
      400536:       55                      push   %rbp
      400537:       48 89 e5                mov    %rsp,%rbp
      40053a:       48 83 ec 10             sub    -bashx10,%rsp
      40053e:       89 7d fc                mov    %edi,-0x4(%rbp)
    /home/ravi/test.c:5
      400541:       8b 45 fc                mov    -0x4(%rbp),%eax
    ...
    ...
    main():
    /home/ravi/test.c:9
      400558:       55                      push   %rbp
      400559:       48 89 e5                mov    %rsp,%rbp
    /home/ravi/test.c:10
      40055c:       bf 2a 00 00 00          mov    -bashx2a,%edi
      400561:       e8 d0 ff ff ff          callq  400536 <foo>

  $ perf probe -x ./test 'foo i'
  $ cat /sys/kernel/debug/tracing/uprobe_events
     p:probe_test/foo /home/ravi/test:0x0000000000000536 i=-12(%sp):s32

  $ perf record -e probe_test:foo ./test
  $ perf script
     test  5778 [001]  4918.562027: probe_test:foo: (400536) i=0

Here variable 'i' is passed via stack which is pushed on stack at
0x40053e. But we are probing at 0x400536.

To resolve this issues, we need to probe on next instruction after
prologue.  gdb and systemtap also does same thing. I've implemented this
patch based on approach systemtap has used.

After applying patch:

  $ perf probe -x ./test 'foo i'
  $ cat /sys/kernel/debug/tracing/uprobe_events
    p:probe_test/foo /home/ravi/test:0x0000000000000541 i=-4(%bp):s32

  $ perf record -e probe_test:foo ./test
  $ perf script
    test  6300 [001]  5877.879327: probe_test:foo: (400541) i=42

No need to skip prologue for optimized case since debug info is correct
for each instructions for -O2 -g. For more details please visit:

        https://bugzilla.redhat.com/show_bug.cgi?id=612253#c6

Changes in v2:

- Skipping prologue only when any ARG is either C variable, $params or
  $vars.

- Probe on line(:1) may not be always possible. Recommend only address
  to force probe on function entry.

Committer notes:

Testing it with 'perf trace':

  # perf probe -x ./test foo i
  Added new event:
    probe_test:foo       (on foo in /home/acme/c/test with i)

  You can now use it in all perf tools, such as:

	  perf record -e probe_test:foo -aR sleep 1

  # cat /sys/kernel/debug/tracing/uprobe_events
  p:probe_test/foo /home/acme/c/test:0x0000000000000526 i=-12(%sp):s32
  # trace --no-sys --event probe_*:* ./test
  i: 42
     0.000 probe_test:foo:(400526) i=0)
  #

After the patch:

  # perf probe -d *:*
  Removed event: probe_test:foo
  # perf probe -x ./test foo i
  Target program is compiled without optimization. Skipping prologue.
  Probe on address 0x400526 to force probing at the function entry.

  Added new event:
    probe_test:foo       (on foo in /home/acme/c/test with i)

  You can now use it in all perf tools, such as:

	perf record -e probe_test:foo -aR sleep 1

  # cat /sys/kernel/debug/tracing/uprobe_events
  p:probe_test/foo /home/acme/c/test:0x0000000000000531 i=-4(%bp):s32
  # trace --no-sys --event probe_*:* ./test
  i: 42
     0.000 probe_test:foo:(400531) i=42)
  #

Reported-by: Michael Petlan <mpetlan@redhat.com>
Report-Link: https://www.mail-archive.com/linux-perf-users@vger.kernel.org/msg02348.html
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1299021
Link: http://lkml.kernel.org/r/1470214725-5023-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com
[ Rename 'die' to 'cu_die' to avoid shadowing a die() definition on at least centos 5, Debian 7 and ubuntu:12.04.5]
[ Use PRIx64 instead of lx to format a Dwarf_Addr, aka long long unsigned int, fixing the build on 32-bit systems ]
[ dwarf_getsrclines() expects a size_t * argument ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 508b61c..003ecad 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -907,6 +907,170 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
 }
 
+static bool var_has_loclist(Dwarf_Die *cu_die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(cu_die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(cu_die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization.
+ */
+static bool optimized_target(Dwarf_Die *cu_die)
+{
+	Dwarf_Die tmp_die;
+
+	if (var_has_loclist(cu_die))
+		return true;
+
+	if (!dwarf_child(cu_die, &tmp_die) && optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(cu_die, &tmp_die) && optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+static bool get_entrypc_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			    Dwarf_Addr pf_addr, unsigned long *entrypc_idx)
+{
+	unsigned long i;
+	Dwarf_Addr addr;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &addr))
+			return false;
+
+		if (addr == pf_addr) {
+			*entrypc_idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool get_postprologue_addr(unsigned long entrypc_idx,
+				  Dwarf_Lines *lines,
+				  unsigned long nr_lines,
+				  Dwarf_Addr highpc,
+				  Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+static void __skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	size_t nr_lines = 0;
+	unsigned long entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(&pf->cu_die, &lines, &nr_lines))
+		return;
+
+	if (!get_entrypc_idx(lines, nr_lines, pf->addr, &entrypc_idx))
+		return;
+
+	if (!get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				   highpc, &postprologue_addr))
+		return;
+
+	pf->addr = postprologue_addr;
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Not uprobe? */
+	if (!pf->pev->uprobes)
+		return;
+
+	/* Compiled with optimization? */
+	if (optimized_target(&pf->cu_die))
+		return;
+
+	/* Don't know entrypc? */
+	if (!pf->addr)
+		return;
+
+	/* Only FUNC and FUNC@SRC are eligible. */
+	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+	    pp->offset || pp->abs_address)
+		return;
+
+	/* Not interested in func parameter? */
+	if (!perf_probe_with_var(pf->pev))
+		return;
+
+	pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+		"Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+		pf->addr);
+
+	__skip_prologue(sp_die, pf);
+}
+
 static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
 {
 	struct probe_finder *pf = data;
@@ -969,6 +1133,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 		if (pp->lazy_line)
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
+			skip_prologue(sp_die, pf);
 			pf->addr += pp->offset;
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
-- 
cgit v0.10.2


From 6243b9dc4c991fe8bdc53a0e029908aef3ddb101 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 30 Aug 2016 14:09:37 +0530
Subject: perf probe: Move dwarf specific functions to dwarf-aux.c

Move generic dwarf related functions from util/probe-finder.c to
util/dwarf-aux.c. Functions name and their prototype are also changed
accordingly. No functionality changes.

Suggested-and-Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1472546377-25612-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a347b19..faec899 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1085,3 +1085,182 @@ int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
 	return -ENOTSUP;
 }
 #endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(vr_die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+	Dwarf_Die tmp_die;
+
+	if (die_has_loclist(cu_die))
+		return true;
+
+	if (!dwarf_child(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			   Dwarf_Addr addr, unsigned long *idx)
+{
+	unsigned long i;
+	Dwarf_Addr tmp;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+			return false;
+
+		if (tmp == addr) {
+			*idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+				      Dwarf_Lines *lines,
+				      unsigned long nr_lines,
+				      Dwarf_Addr highpc,
+				      Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc)
+{
+	size_t nr_lines = 0;
+	unsigned long entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+		return;
+
+	if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+		return;
+
+	if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				       highpc, &postprologue_addr))
+		return;
+
+	*entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index dc0ce1a..8b6d2f8 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -125,4 +125,12 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 /* Get the name and type of given variable DIE, stored as "type\tname" */
 int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
 int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc);
+
 #endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 003ecad..8daca4f 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -907,138 +907,6 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
 	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
 }
 
-static bool var_has_loclist(Dwarf_Die *cu_die)
-{
-	Dwarf_Attribute loc;
-	int tag = dwarf_tag(cu_die);
-
-	if (tag != DW_TAG_formal_parameter &&
-	    tag != DW_TAG_variable)
-		return false;
-
-	return (dwarf_attr_integrate(cu_die, DW_AT_location, &loc) &&
-		dwarf_whatform(&loc) == DW_FORM_sec_offset);
-}
-
-/*
- * For any object in given CU whose DW_AT_location is a location list,
- * target program is compiled with optimization.
- */
-static bool optimized_target(Dwarf_Die *cu_die)
-{
-	Dwarf_Die tmp_die;
-
-	if (var_has_loclist(cu_die))
-		return true;
-
-	if (!dwarf_child(cu_die, &tmp_die) && optimized_target(&tmp_die))
-		return true;
-
-	if (!dwarf_siblingof(cu_die, &tmp_die) && optimized_target(&tmp_die))
-		return true;
-
-	return false;
-}
-
-static bool get_entrypc_idx(Dwarf_Lines *lines, unsigned long nr_lines,
-			    Dwarf_Addr pf_addr, unsigned long *entrypc_idx)
-{
-	unsigned long i;
-	Dwarf_Addr addr;
-
-	for (i = 0; i < nr_lines; i++) {
-		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &addr))
-			return false;
-
-		if (addr == pf_addr) {
-			*entrypc_idx = i;
-			return true;
-		}
-	}
-	return false;
-}
-
-static bool get_postprologue_addr(unsigned long entrypc_idx,
-				  Dwarf_Lines *lines,
-				  unsigned long nr_lines,
-				  Dwarf_Addr highpc,
-				  Dwarf_Addr *postprologue_addr)
-{
-	unsigned long i;
-	int entrypc_lno, lno;
-	Dwarf_Line *line;
-	Dwarf_Addr addr;
-	bool p_end;
-
-	/* entrypc_lno is actual source line number */
-	line = dwarf_onesrcline(lines, entrypc_idx);
-	if (dwarf_lineno(line, &entrypc_lno))
-		return false;
-
-	for (i = entrypc_idx; i < nr_lines; i++) {
-		line = dwarf_onesrcline(lines, i);
-
-		if (dwarf_lineaddr(line, &addr) ||
-		    dwarf_lineno(line, &lno)    ||
-		    dwarf_lineprologueend(line, &p_end))
-			return false;
-
-		/* highpc is exclusive. [entrypc,highpc) */
-		if (addr >= highpc)
-			break;
-
-		/* clang supports prologue-end marker */
-		if (p_end)
-			break;
-
-		/* Actual next line in source */
-		if (lno != entrypc_lno)
-			break;
-
-		/*
-		 * Single source line can have multiple line records.
-		 * For Example,
-		 *     void foo() { printf("hello\n"); }
-		 * contains two line records. One points to declaration and
-		 * other points to printf() line. Variable 'lno' won't get
-		 * incremented in this case but 'i' will.
-		 */
-		if (i != entrypc_idx)
-			break;
-	}
-
-	dwarf_lineaddr(line, postprologue_addr);
-	if (*postprologue_addr >= highpc)
-		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
-			       postprologue_addr);
-
-	return true;
-}
-
-static void __skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
-{
-	size_t nr_lines = 0;
-	unsigned long entrypc_idx = 0;
-	Dwarf_Lines *lines = NULL;
-	Dwarf_Addr postprologue_addr;
-	Dwarf_Addr highpc;
-
-	if (dwarf_highpc(sp_die, &highpc))
-		return;
-
-	if (dwarf_getsrclines(&pf->cu_die, &lines, &nr_lines))
-		return;
-
-	if (!get_entrypc_idx(lines, nr_lines, pf->addr, &entrypc_idx))
-		return;
-
-	if (!get_postprologue_addr(entrypc_idx, lines, nr_lines,
-				   highpc, &postprologue_addr))
-		return;
-
-	pf->addr = postprologue_addr;
-}
-
 static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
 	struct perf_probe_point *pp = &pf->pev->point;
@@ -1048,7 +916,7 @@ static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
 		return;
 
 	/* Compiled with optimization? */
-	if (optimized_target(&pf->cu_die))
+	if (die_is_optimized_target(&pf->cu_die))
 		return;
 
 	/* Don't know entrypc? */
@@ -1068,7 +936,7 @@ static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
 		"Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
 		pf->addr);
 
-	__skip_prologue(sp_die, pf);
+	die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
 }
 
 static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
-- 
cgit v0.10.2


From 68ce4a0dea168e99d422aed8f93eca5528fd0e50 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 16 Aug 2016 16:09:48 -0400
Subject: perf/x86/intel/uncore: Remove hard-coded implementation for Node ID
 mapping location

The method to build PCI bus to socket mapping is similar among
platforms. However, the PCI location which stores Node ID mapping could
vary between different platforms. For example, the Node ID mapping address
on Skylake server is different from the previous platform. Also, to
build the mapping for the PCI bus without UBOX, it has to start from
bus 0 on Skylake server.

This patch removes the current hardcoded implementation and adds
three parameters for snbep_pci2phy_map_init(). This way the Node ID mapping
address and bus searching direction can be configured according to
different platforms.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nilay Vaish <nilayvaish@gmail.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471378190-17276-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 8aee83b..3719af5 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,10 @@
 /* SandyBridge-EP/IvyTown uncore support */
 #include "uncore.h"
 
+/* SNB-EP pci bus to socket mapping */
+#define SNBEP_CPUNODEID			0x40
+#define SNBEP_GIDNIDMAP			0x54
+
 /* SNB-EP Box level control */
 #define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
 #define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1)
@@ -1153,7 +1157,7 @@ static struct pci_driver snbep_uncore_pci_driver = {
 /*
  * build pci bus to socket mapping
  */
-static int snbep_pci2phy_map_init(int devid)
+static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
 {
 	struct pci_dev *ubox_dev = NULL;
 	int i, bus, nodeid, segment;
@@ -1168,12 +1172,12 @@ static int snbep_pci2phy_map_init(int devid)
 			break;
 		bus = ubox_dev->bus->number;
 		/* get the Node ID of the local register */
-		err = pci_read_config_dword(ubox_dev, 0x40, &config);
+		err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
 		if (err)
 			break;
 		nodeid = config;
 		/* get the Node ID mapping */
-		err = pci_read_config_dword(ubox_dev, 0x54, &config);
+		err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
 		if (err)
 			break;
 
@@ -1207,11 +1211,20 @@ static int snbep_pci2phy_map_init(int devid)
 		raw_spin_lock(&pci2phy_map_lock);
 		list_for_each_entry(map, &pci2phy_map_head, list) {
 			i = -1;
-			for (bus = 255; bus >= 0; bus--) {
-				if (map->pbus_to_physid[bus] >= 0)
-					i = map->pbus_to_physid[bus];
-				else
-					map->pbus_to_physid[bus] = i;
+			if (reverse) {
+				for (bus = 255; bus >= 0; bus--) {
+					if (map->pbus_to_physid[bus] >= 0)
+						i = map->pbus_to_physid[bus];
+					else
+						map->pbus_to_physid[bus] = i;
+				}
+			} else {
+				for (bus = 0; bus <= 255; bus++) {
+					if (map->pbus_to_physid[bus] >= 0)
+						i = map->pbus_to_physid[bus];
+					else
+						map->pbus_to_physid[bus] = i;
+				}
 			}
 		}
 		raw_spin_unlock(&pci2phy_map_lock);
@@ -1224,7 +1237,7 @@ static int snbep_pci2phy_map_init(int devid)
 
 int snbep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x3ce0);
+	int ret = snbep_pci2phy_map_init(0x3ce0, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = snbep_pci_uncores;
@@ -1788,7 +1801,7 @@ static struct pci_driver ivbep_uncore_pci_driver = {
 
 int ivbep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x0e1e);
+	int ret = snbep_pci2phy_map_init(0x0e1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = ivbep_pci_uncores;
@@ -2897,7 +2910,7 @@ static struct pci_driver hswep_uncore_pci_driver = {
 
 int hswep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x2f1e);
+	int ret = snbep_pci2phy_map_init(0x2f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = hswep_pci_uncores;
@@ -3186,7 +3199,7 @@ static struct pci_driver bdx_uncore_pci_driver = {
 
 int bdx_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x6f1e);
+	int ret = snbep_pci2phy_map_init(0x6f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 
 	if (ret)
 		return ret;
-- 
cgit v0.10.2


From 24cf84672e0a1e0d13f3894b60cd820a0140342a Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 16 Aug 2016 16:09:49 -0400
Subject: perf/x86/intel/uncore: Handle non-standard counter offset

The offset of the counters for UPI and M2M boxes on Skylake server is
non-standard (8 bytes apart).

This patch introduces a custom flag UNCORE_BOX_FLAG_CTL_OFFS8 to
specially handle it.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471378190-17276-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 78b9c23..a43175f 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -120,6 +120,7 @@ struct intel_uncore_box {
 };
 
 #define UNCORE_BOX_FLAG_INITIATED	0
+#define UNCORE_BOX_FLAG_CTL_OFFS8	1 /* event config registers are 8-byte apart */
 
 struct uncore_event_desc {
 	struct kobj_attribute attr;
@@ -172,6 +173,9 @@ static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
 static inline
 unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
 {
+	if (test_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags))
+		return idx * 8 + box->pmu->type->event_ctl;
+
 	return idx * 4 + box->pmu->type->event_ctl;
 }
 
-- 
cgit v0.10.2


From c9bbdd4830ab06288bb1d8c00ed8c8c6e80e377a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 15 Aug 2016 11:42:45 +0100
Subject: perf/core: Don't pass PERF_EF_START to the PMU ->start callback

PERF_EF_START is a flag to indicate to the PMU ->add() callback that, as
well as claiming the PMU resources required by the event being added,
it should also start the PMU.

Passing this flag to the ->start() callback doesn't make sense, because
->start() always tries to start the PMU. Remove it.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1471257765-29662-1-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index dff00c7..74f22a9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2492,7 +2492,7 @@ static int __perf_event_stop(void *info)
 	 * while restarting.
 	 */
 	if (sd->restart)
-		event->pmu->start(event, PERF_EF_START);
+		event->pmu->start(event, 0);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 608c34de0b3d7bd15340a95ef758b4d8b81ebfc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 17:54:31 -0300
Subject: perf symbols: Mark if a symbol is idle in the library

This was being done just in 'perf top', but grouping idle symbols should
be useful in other places as well, so remove one more symbol_filter_t
user by moving this to the symbol library.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-5r7xitjkzjr9jak1zy3d8u5l@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e091900..6f48df1 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -679,9 +679,6 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 	    strstr(name, "_text_end"))
 		return 1;
 
-	if (symbol__is_idle(sym))
-		sym->idle = 1;
-
 	return 0;
 }
 
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 295d314..bd91a4f 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1127,7 +1127,7 @@ new_symbol:
 		if (filter && filter(curr_map, f))
 			symbol__delete(f);
 		else {
-			symbols__insert(&curr_dso->symbols[curr_map->type], f);
+			__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
 			nr++;
 		}
 	}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 98cd503..4c5788f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -28,6 +28,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter);
 static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 			symbol_filter_t filter);
+static bool symbol__is_idle(const char *name);
+
 int vmlinux_path__nr_entries;
 char **vmlinux_path;
 
@@ -277,13 +279,24 @@ void symbols__delete(struct rb_root *symbols)
 	}
 }
 
-void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
 {
 	struct rb_node **p = &symbols->rb_node;
 	struct rb_node *parent = NULL;
 	const u64 ip = sym->start;
 	struct symbol *s;
 
+	if (kernel) {
+		const char *name = sym->name;
+		/*
+		 * ppc64 uses function descriptors and appends a '.' to the
+		 * start of every instruction address. Remove it.
+		 */
+		if (name[0] == '.')
+			name++;
+		sym->idle = symbol__is_idle(name);
+	}
+
 	while (*p != NULL) {
 		parent = *p;
 		s = rb_entry(parent, struct symbol, rb_node);
@@ -296,6 +309,11 @@ void symbols__insert(struct rb_root *symbols, struct symbol *sym)
 	rb_insert_color(&sym->rb_node, symbols);
 }
 
+void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+{
+	__symbols__insert(symbols, sym, false);
+}
+
 static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
 {
 	struct rb_node *n;
@@ -424,7 +442,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
 
 void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
 {
-	symbols__insert(&dso->symbols[type], sym);
+	__symbols__insert(&dso->symbols[type], sym, dso->kernel);
 
 	/* update the symbol cache if necessary */
 	if (dso->last_find_result[type].addr >= sym->start &&
@@ -546,7 +564,7 @@ struct process_kallsyms_args {
  * These are symbols in the kernel image, so make sure that
  * sym is from a kernel DSO.
  */
-bool symbol__is_idle(struct symbol *sym)
+static bool symbol__is_idle(const char *name)
 {
 	const char * const idle_symbols[] = {
 		"cpu_idle",
@@ -563,14 +581,10 @@ bool symbol__is_idle(struct symbol *sym)
 		"pseries_dedicated_idle_sleep",
 		NULL
 	};
-
 	int i;
 
-	if (!sym)
-		return false;
-
 	for (i = 0; idle_symbols[i]; i++) {
-		if (!strcmp(idle_symbols[i], sym->name))
+		if (!strcmp(idle_symbols[i], name))
 			return true;
 	}
 
@@ -599,7 +613,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
 	 * We will pass the symbols to the filter later, in
 	 * map__split_kallsyms, when we have split the maps per module
 	 */
-	symbols__insert(root, sym);
+	__symbols__insert(root, sym, !strchr(name, '['));
 
 	return 0;
 }
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e54ee7c..72d2931 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -294,7 +294,6 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 bool symbol__restricted_filename(const char *filename,
 				 const char *restricted_filename);
-bool symbol__is_idle(struct symbol *sym);
 int symbol__config_symfs(const struct option *opt __maybe_unused,
 			 const char *dir, int unset __maybe_unused);
 
@@ -304,6 +303,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
 				struct map *map, symbol_filter_t filter);
 
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
 void symbols__insert(struct rb_root *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
-- 
cgit v0.10.2


From b6220212d48a9bfbc694d10b38dbdfbaab81f4a0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 18:47:15 -0300
Subject: perf top: Remove old kernel-only symbol filter

Not needed, we already have code to prune aliases.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-1ysyce7qjgui93gi1efbjwhf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6f48df1..4007857 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -657,31 +657,6 @@ repeat:
 	return NULL;
 }
 
-static int symbol_filter(struct map *map, struct symbol *sym)
-{
-	const char *name = sym->name;
-
-	if (!__map__is_kernel(map))
-		return 0;
-	/*
-	 * ppc64 uses function descriptors and appends a '.' to the
-	 * start of every instruction address. Remove it.
-	 */
-	if (name[0] == '.')
-		name++;
-
-	if (!strcmp(name, "_text") ||
-	    !strcmp(name, "_etext") ||
-	    !strcmp(name, "_sinittext") ||
-	    !strncmp("init_module", name, 11) ||
-	    !strncmp("cleanup_module", name, 14) ||
-	    strstr(name, "_text_start") ||
-	    strstr(name, "_text_end"))
-		return 1;
-
-	return 0;
-}
-
 static int hist_iter__top_callback(struct hist_entry_iter *iter,
 				   struct addr_location *al, bool single,
 				   void *arg)
@@ -946,8 +921,6 @@ static int __cmd_top(struct perf_top *top)
 	if (top->session == NULL)
 		return -1;
 
-	machines__set_symbol_filter(&top->session->machines, symbol_filter);
-
 	if (!objdump_path) {
 		ret = perf_env__lookup_objdump(&top->session->header.env);
 		if (ret)
-- 
cgit v0.10.2


From 0890e97c20333c439a9bda6a94dd16ed5f508429 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 18:53:58 -0300
Subject: perf machine: Remove machine->symbol_filter and friends

Including machines__set_symbol_filter(), not used anymore.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-7o1qgmrpvzuis4a9f0t8mnri@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e20438b..2f91183 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1286,7 +1286,7 @@ try_again:
 		 * must be done prior to using kernel maps.
 		 */
 		if (load_map)
-			map__load(al->map, machine->symbol_filter);
+			map__load(al->map, NULL);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
 }
@@ -1297,8 +1297,7 @@ void thread__find_addr_location(struct thread *thread,
 {
 	thread__find_addr_map(thread, cpumode, type, addr, al);
 	if (al->map != NULL)
-		al->sym = map__find_symbol(al->map, al->addr,
-					   thread->mg->machine->symbol_filter);
+		al->sym = map__find_symbol(al->map, al->addr, NULL);
 	else
 		al->sym = NULL;
 }
@@ -1359,8 +1358,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 			al->filtered |= (1 << HIST_FILTER__DSO);
 		}
 
-		al->sym = map__find_symbol(al->map, al->addr,
-					   machine->symbol_filter);
+		al->sym = map__find_symbol(al->map, al->addr, NULL);
 	}
 
 	if (symbol_conf.sym_list &&
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 749e6f2..240b095 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -346,7 +346,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
 		goto out_put;
 
 	/* Load maps to ensure dso->is_64_bit has been updated */
-	map__load(al.map, machine->symbol_filter);
+	map__load(al.map, NULL);
 
 	x86_64 = al.map->dso->is_64_bit;
 
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 551ff6f..d594052 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -477,7 +477,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 		start_ip = *ip;
 
 		/* Load maps to ensure dso->is_64_bit has been updated */
-		map__load(al.map, machine->symbol_filter);
+		map__load(al.map, NULL);
 
 		x86_64 = al.map->dso->is_64_bit;
 
@@ -1294,7 +1294,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
 	if (!map)
 		return 0;
 
-	if (map__load(map, machine->symbol_filter))
+	if (map__load(map, NULL))
 		return 0;
 
 	start = dso__first_symbol(map->dso, MAP__FUNCTION);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index cb6388d..7940ddc 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -41,7 +41,6 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 
 	machine->pid = pid;
 
-	machine->symbol_filter = NULL;
 	machine->id_hdr_size = 0;
 	machine->kptr_restrict_warned = false;
 	machine->comm_exec = false;
@@ -148,7 +147,6 @@ void machines__init(struct machines *machines)
 {
 	machine__init(&machines->host, "", HOST_KERNEL_ID);
 	machines->guests = RB_ROOT;
-	machines->symbol_filter = NULL;
 }
 
 void machines__exit(struct machines *machines)
@@ -172,8 +170,6 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
 		return NULL;
 	}
 
-	machine->symbol_filter = machines->symbol_filter;
-
 	while (*p != NULL) {
 		parent = *p;
 		pos = rb_entry(parent, struct machine, rb_node);
@@ -189,21 +185,6 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
 	return machine;
 }
 
-void machines__set_symbol_filter(struct machines *machines,
-				 symbol_filter_t symbol_filter)
-{
-	struct rb_node *nd;
-
-	machines->symbol_filter = symbol_filter;
-	machines->host.symbol_filter = symbol_filter;
-
-	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
-		struct machine *machine = rb_entry(nd, struct machine, rb_node);
-
-		machine->symbol_filter = symbol_filter;
-	}
-}
-
 void machines__set_comm_exec(struct machines *machines, bool comm_exec)
 {
 	struct rb_node *nd;
@@ -2115,7 +2096,7 @@ int machine__get_kernel_start(struct machine *machine)
 	 */
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
-		err = map__load(map, machine->symbol_filter);
+		err = map__load(map, NULL);
 		if (map->start)
 			machine->kernel_start = map->start;
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 20739f7..b9e5588 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -41,7 +41,6 @@ struct machine {
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
-	symbol_filter_t	  symbol_filter;
 	pid_t		  *current_tid;
 	union { /* Tool specific area */
 		void	  *priv;
@@ -110,7 +109,6 @@ typedef void (*machine__process_t)(struct machine *machine, void *data);
 struct machines {
 	struct machine host;
 	struct rb_root guests;
-	symbol_filter_t symbol_filter;
 };
 
 void machines__init(struct machines *machines);
@@ -128,8 +126,6 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid);
 void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
 char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 
-void machines__set_symbol_filter(struct machines *machines,
-				 symbol_filter_t symbol_filter);
 void machines__set_comm_exec(struct machines *machines, bool comm_exec);
 
 struct machine *machine__new_host(void);
-- 
cgit v0.10.2


From c79c809197bf4faaf109d1a7bdc27ecedac375de Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 19:22:02 -0300
Subject: perf test vmlinux: Remove dead symbol_filter_t code

We don't need to initialize that area as we're not using it afterwards,
leftover, ditch it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-jb2un8buy4rqawz73mcdm1sn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 77513bf..e6925d6 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -8,14 +8,6 @@
 #include "debug.h"
 #include "machine.h"
 
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
-					   struct symbol *sym)
-{
-	bool *visited = symbol__priv(sym);
-	*visited = true;
-	return 0;
-}
-
 #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
 
 int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -100,8 +92,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type,
-				       vmlinux_matches_kallsyms_filter) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux, type, NULL) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
-- 
cgit v0.10.2


From be39db9f2932f0ce4e116c71ba5ae2b542e536a7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Sep 2016 19:25:52 -0300
Subject: perf symbols: Remove symbol_filter_t machinery

We're not using it anymore, few users were, but we really could do
without it, simplify lots of functions by removing it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-1zng8wdznn00iiz08bb7q3vn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 35745a7..ed9d5d1 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
 	int i = 0;
 
 	map = get_target_map(pev->target, pev->uprobes);
-	if (!map || map__load(map, NULL) < 0)
+	if (!map || map__load(map) < 0)
 		return;
 
 	for (i = 0; i < ntevs; i++) {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..b9bc7e3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -429,7 +429,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
 			al.map->dso->hit = 1;
-			if (map__load(al.map, NULL) >= 0) {
+			if (map__load(al.map) >= 0) {
 				dso__inject_build_id(al.map->dso, tool, machine);
 				/*
 				 * If this fails, too bad, let the other side
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fdde1bd..d426dcb 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -330,7 +330,7 @@ static int build_alloc_func_list(void)
 	}
 
 	kernel_map = machine__kernel_map(machine);
-	if (map__load(kernel_map, NULL) < 0) {
+	if (map__load(kernel_map) < 0) {
 		pr_err("cannot load kernel map\n");
 		return -ENOENT;
 	}
@@ -979,7 +979,7 @@ static void __print_slab_result(struct rb_root *root,
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = machine__find_kernel_function(machine, addr, &map, NULL);
+				sym = machine__find_kernel_function(machine, addr, &map);
 		} else
 			addr = data->ptr;
 
@@ -1043,8 +1043,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
@@ -1086,8 +1085,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6b3c8b0..7228d14 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -522,11 +522,11 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
 		if (alf.map)
-			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+			alf.sym = map__find_symbol(alf.map, alf.addr);
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		if (alt.map)
-			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
 		putchar('/');
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 2af156a..ff5bc63 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -263,7 +263,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	 * Converting addresses for use by objdump requires more information.
 	 * map__load() does that.  See map__rip_2objdump() for details.
 	 */
-	if (map__load(al.map, NULL))
+	if (map__load(al.map))
 		return -1;
 
 	/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@ static int do_test_code_reading(bool try_kcore)
 
 	/* Load kernel map */
 	map = machine__kernel_map(machine);
-	ret = map__load(map, NULL);
+	ret = map__load(map);
 	if (ret < 0) {
 		pr_debug("map__load failed\n");
 		goto out_err;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e6925d6..a508233 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -54,7 +54,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
+	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -92,7 +92,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type, NULL) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
@@ -118,7 +118,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
 
 		first_pair = machine__find_kernel_symbol(&kallsyms, type,
-							 mem_start, NULL, NULL);
+							 mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -147,7 +147,7 @@ next_pair:
 				 */
 				continue;
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2e2d100..4c18271 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -495,7 +495,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	if (!ins__is_call(dl->ins))
 		return false;
 
-	if (map_groups__find_ams(&target, NULL) ||
+	if (map_groups__find_ams(&target) ||
 	    map__rip_2objdump(target.map, target.map->map_ip(target.map,
 							     target.addr)) !=
 	    dl->ops.target.addr) {
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 8091277..98a3466 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -52,9 +52,9 @@ static int map_browser__search(struct map_browser *browser)
 
 	if (target[0] == '0' && tolower(target[1]) == 'x') {
 		u64 addr = strtoull(target, NULL, 16);
-		sym = map__find_symbol(browser->map, addr, NULL);
+		sym = map__find_symbol(browser->map, addr);
 	} else
-		sym = map__find_symbol_by_name(browser->map, target, NULL);
+		sym = map__find_symbol_by_name(browser->map, target);
 
 	if (sym != NULL) {
 		u32 *idx = symbol__browser_index(sym);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1b59e31..2ff6bd7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1077,7 +1077,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 			.addr = dl->ops.target.addr,
 		};
 
-		if (!map_groups__find_ams(&target, NULL) &&
+		if (!map_groups__find_ams(&target) &&
 		    target.sym->start == target.al_addr)
 			dl->ops.target.name = strdup(target.sym->name);
 	}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2f91183..9ad7d32 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1286,7 +1286,7 @@ try_again:
 		 * must be done prior to using kernel maps.
 		 */
 		if (load_map)
-			map__load(al->map, NULL);
+			map__load(al->map);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
 }
@@ -1297,7 +1297,7 @@ void thread__find_addr_location(struct thread *thread,
 {
 	thread__find_addr_map(thread, cpumode, type, addr, al);
 	if (al->map != NULL)
-		al->sym = map__find_symbol(al->map, al->addr, NULL);
+		al->sym = map__find_symbol(al->map, al->addr);
 	else
 		al->sym = NULL;
 }
@@ -1358,7 +1358,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 			al->filtered |= (1 << HIST_FILTER__DSO);
 		}
 
-		al->sym = map__find_symbol(al->map, al->addr, NULL);
+		al->sym = map__find_symbol(al->map, al->addr);
 	}
 
 	if (symbol_conf.sym_list &&
@@ -1414,5 +1414,5 @@ void thread__resolve(struct thread *thread, struct addr_location *al,
 	al->sym = NULL;
 
 	if (al->map)
-		al->sym = map__find_symbol(al->map, al->addr, NULL);
+		al->sym = map__find_symbol(al->map, al->addr);
 }
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 240b095..f545ec1 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -346,7 +346,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
 		goto out_put;
 
 	/* Load maps to ensure dso->is_64_bit has been updated */
-	map__load(al.map, NULL);
+	map__load(al.map);
 
 	x86_64 = al.map->dso->is_64_bit;
 
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d594052..b9cc353 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -477,7 +477,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 		start_ip = *ip;
 
 		/* Load maps to ensure dso->is_64_bit has been updated */
-		map__load(al.map, NULL);
+		map__load(al.map);
 
 		x86_64 = al.map->dso->is_64_bit;
 
@@ -1294,7 +1294,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
 	if (!map)
 		return 0;
 
-	if (map__load(map, NULL))
+	if (map__load(map))
 		return 0;
 
 	start = dso__first_symbol(map->dso, MAP__FUNCTION);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7940ddc..18e4519 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -897,10 +897,10 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
 }
 
 int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore, symbol_filter_t filter)
+			     enum map_type type, bool no_kcore)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
 
 	if (ret > 0) {
 		dso__set_loaded(map->dso, type);
@@ -916,16 +916,15 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename,
 }
 
 int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter)
+			   enum map_type type)
 {
-	return __machine__load_kallsyms(machine, filename, type, false, filter);
+	return __machine__load_kallsyms(machine, filename, type, false);
 }
 
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter)
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = dso__load_vmlinux_path(map->dso, map, filter);
+	int ret = dso__load_vmlinux_path(map->dso, map);
 
 	if (ret > 0)
 		dso__set_loaded(map->dso, type);
@@ -1294,7 +1293,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			/*
 			 * preload dso of guest kernel and modules
 			 */
-			dso__load(kernel, machine__kernel_map(machine), NULL);
+			dso__load(kernel, machine__kernel_map(machine));
 		}
 	}
 	return 0;
@@ -2096,7 +2095,7 @@ int machine__get_kernel_start(struct machine *machine)
 	 */
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
-		err = map__load(map, NULL);
+		err = map__load(map);
 		if (map->start)
 			machine->kernel_start = map->start;
 	}
@@ -2112,7 +2111,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
 {
 	struct machine *machine = vmachine;
 	struct map *map;
-	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map,  NULL);
+	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
 
 	if (sym == NULL)
 		return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index b9e5588..354de6e 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -174,40 +174,33 @@ size_t machine__fprintf(struct machine *machine, FILE *fp);
 static inline
 struct symbol *machine__find_kernel_symbol(struct machine *machine,
 					   enum map_type type, u64 addr,
-					   struct map **mapp,
-					   symbol_filter_t filter)
+					   struct map **mapp)
 {
-	return map_groups__find_symbol(&machine->kmaps, type, addr,
-				       mapp, filter);
+	return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
 						   enum map_type type, const char *name,
-						   struct map **mapp,
-						   symbol_filter_t filter)
+						   struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
-					       mapp, filter);
+	return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
-					     struct map **mapp,
-					     symbol_filter_t filter)
+					     struct map **mapp)
 {
 	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
-					   mapp, filter);
+					   mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
 						     const char *name,
-						     struct map **mapp,
-						     symbol_filter_t filter)
+						     struct map **mapp)
 {
-	return map_groups__find_function_by_name(&machine->kmaps, name, mapp,
-						 filter);
+	return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
 }
 
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
@@ -215,11 +208,10 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 int arch__fix_module_text_start(u64 *start, const char *name);
 
 int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore, symbol_filter_t filter);
+			     enum map_type type, bool no_kcore);
 int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter);
+			   enum map_type type);
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
 
 size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 728129a..0c54adb 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -279,7 +279,7 @@ void map__fixup_end(struct map *map)
 
 #define DSO__DELETED "(deleted)"
 
-int map__load(struct map *map, symbol_filter_t filter)
+int map__load(struct map *map)
 {
 	const char *name = map->dso->long_name;
 	int nr;
@@ -287,7 +287,7 @@ int map__load(struct map *map, symbol_filter_t filter)
 	if (dso__loaded(map->dso, map->type))
 		return 0;
 
-	nr = dso__load(map->dso, map, filter);
+	nr = dso__load(map->dso, map);
 	if (nr < 0) {
 		if (map->dso->has_build_id) {
 			char sbuild_id[SBUILD_ID_SIZE];
@@ -312,9 +312,6 @@ int map__load(struct map *map, symbol_filter_t filter)
 			pr_warning("%.*s was updated (is prelink enabled?). "
 				"Restart the long running apps that use it!\n",
 				   (int)real_len, name);
-		} else if (filter) {
-			pr_warning("no symbols passed the given filter.\n");
-			return -2;	/* Empty but maybe by the filter */
 		} else {
 			pr_warning("no symbols found in %s, maybe install "
 				   "a debug package?\n", name);
@@ -331,19 +328,17 @@ int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
 	return strcmp(namea, nameb);
 }
 
-struct symbol *map__find_symbol(struct map *map, u64 addr,
-				symbol_filter_t filter)
+struct symbol *map__find_symbol(struct map *map, u64 addr)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	return dso__find_symbol(map->dso, map->type, addr);
 }
 
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
-					symbol_filter_t filter)
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	if (!dso__sorted_by_name(map->dso, map->type))
@@ -556,23 +551,22 @@ void map_groups__put(struct map_groups *mg)
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
-				       struct map **mapp,
-				       symbol_filter_t filter)
+				       struct map **mapp)
 {
 	struct map *map = map_groups__find(mg, type, addr);
 
 	/* Ensure map is loaded before using map->map_ip */
-	if (map != NULL && map__load(map, filter) >= 0) {
+	if (map != NULL && map__load(map) >= 0) {
 		if (mapp != NULL)
 			*mapp = map;
-		return map__find_symbol(map, map->map_ip(map, addr), filter);
+		return map__find_symbol(map, map->map_ip(map, addr));
 	}
 
 	return NULL;
 }
 
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
-					 struct map **mapp, symbol_filter_t filter)
+					 struct map **mapp)
 {
 	struct symbol *sym;
 	struct rb_node *nd;
@@ -582,7 +576,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 
-		sym = map__find_symbol_by_name(pos, name, filter);
+		sym = map__find_symbol_by_name(pos, name);
 
 		if (sym == NULL)
 			continue;
@@ -600,15 +594,14 @@ out:
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       enum map_type type,
 					       const char *name,
-					       struct map **mapp,
-					       symbol_filter_t filter)
+					       struct map **mapp)
 {
-	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp, filter);
+	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
 
 	return sym;
 }
 
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
+int map_groups__find_ams(struct addr_map_symbol *ams)
 {
 	if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
 		if (ams->map->groups == NULL)
@@ -620,7 +613,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
 	}
 
 	ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
-	ams->sym = map__find_symbol(ams->map, ams->al_addr, filter);
+	ams->sym = map__find_symbol(ams->map, ams->al_addr);
 
 	return ams->sym ? 0 : -1;
 }
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d83396c..abdacf8 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -127,17 +127,14 @@ struct thread;
  * @map: the 'struct map *' in which symbols itereated
  * @sym_name: the symbol name
  * @pos: the 'struct symbol *' to use as a loop cursor
- * @filter: to use when loading the DSO
  */
-#define __map__for_each_symbol_by_name(map, sym_name, pos, filter)	\
-	for (pos = map__find_symbol_by_name(map, sym_name, filter);	\
+#define __map__for_each_symbol_by_name(map, sym_name, pos)	\
+	for (pos = map__find_symbol_by_name(map, sym_name);	\
 	     pos && arch__compare_symbol_names(pos->name, sym_name) == 0;	\
 	     pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
-	__map__for_each_symbol_by_name(map, sym_name, (pos), NULL)
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
 int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
@@ -173,11 +170,9 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp);
 int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 			 FILE *fp);
 
-int map__load(struct map *map, symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *map,
-				u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
-					symbol_filter_t filter);
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
 void map__fixup_start(struct map *map);
 void map__fixup_end(struct map *map);
 
@@ -191,7 +186,7 @@ struct map *maps__find(struct maps *maps, u64 addr);
 struct map *maps__first(struct maps *maps);
 struct map *map__next(struct map *map);
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
-                                         struct map **mapp, symbol_filter_t filter);
+                                         struct map **mapp);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct thread *thread,
@@ -231,25 +226,22 @@ static inline struct map *map_groups__next(struct map *map)
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
-				       struct map **mapp,
-				       symbol_filter_t filter);
+				       struct map **mapp);
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       enum map_type type,
 					       const char *name,
-					       struct map **mapp,
-					       symbol_filter_t filter);
+					       struct map **mapp);
 
 struct addr_map_symbol;
 
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter);
+int map_groups__find_ams(struct addr_map_symbol *ams);
 
 static inline
 struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
-						 const char *name, struct map **mapp,
-						 symbol_filter_t filter)
+						 const char *name, struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp, filter);
+	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
 }
 
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a543e9c..bc60ce4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -110,13 +110,12 @@ void exit_probe_symbol_maps(void)
 static struct symbol *__find_kernel_function_by_name(const char *name,
 						     struct map **mapp)
 {
-	return machine__find_kernel_function_by_name(host_machine, name, mapp,
-						     NULL);
+	return machine__find_kernel_function_by_name(host_machine, name, mapp);
 }
 
 static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
 {
-	return machine__find_kernel_function(host_machine, addr, mapp, NULL);
+	return machine__find_kernel_function(host_machine, addr, mapp);
 }
 
 static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
@@ -125,7 +124,7 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
 	struct kmap *kmap;
 	struct map *map = machine__kernel_map(host_machine);
 
-	if (map__load(map, NULL) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	kmap = map__kmap(map);
@@ -351,9 +350,9 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
 	vmlinux_name = symbol_conf.vmlinux_name;
 	dso->load_errno = 0;
 	if (vmlinux_name)
-		ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
 	else
-		ret = dso__load_vmlinux_path(dso, map, NULL);
+		ret = dso__load_vmlinux_path(dso, map);
 found:
 	*pdso = dso;
 	return ret;
@@ -1999,7 +1998,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
 		map = dso__new_map(tp->module);
 		if (!map)
 			goto out;
-		sym = map__find_symbol(map, addr, NULL);
+		sym = map__find_symbol(map, addr);
 	} else {
 		if (tp->symbol && !addr) {
 			if (kernel_get_symbol_address_by_name(tp->symbol,
@@ -2704,7 +2703,7 @@ static int find_probe_functions(struct map *map, char *name,
 	struct symbol *sym;
 	struct rb_node *tmp;
 
-	if (map__load(map, NULL) < 0)
+	if (map__load(map) < 0)
 		return 0;
 
 	map__for_each_symbol(map, sym, tmp) {
@@ -3368,7 +3367,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 		return -EINVAL;
 	}
 
-	ret = map__load(map, NULL);
+	ret = map__load(map);
 	if (ret) {
 		if (ret == -2) {
 			char *str = strfilter__string(_filter);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index bd91a4f..99400b0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -254,8 +254,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map,
-				symbol_filter_t filter)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -351,12 +350,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 			if (!f)
 				goto out_elf_end;
 
-			if (filter && filter(map, f))
-				symbol__delete(f);
-			else {
-				symbols__insert(&dso->symbols[map->type], f);
-				++nr;
-			}
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
 		}
 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
 		GElf_Rel pos_mem, *pos;
@@ -381,12 +376,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 			if (!f)
 				goto out_elf_end;
 
-			if (filter && filter(map, f))
-				symbol__delete(f);
-			else {
-				symbols__insert(&dso->symbols[map->type], f);
-				++nr;
-			}
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
 		}
 	}
 
@@ -825,9 +816,8 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
-int dso__load_sym(struct dso *dso, struct map *map,
-		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
-		  symbol_filter_t filter, int kmodule)
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule)
 {
 	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
 	struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
@@ -1124,12 +1114,8 @@ new_symbol:
 
 		arch__sym_update(f, &sym);
 
-		if (filter && filter(curr_map, f))
-			symbol__delete(f);
-		else {
-			__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
-			nr++;
-		}
+		__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+		nr++;
 	}
 
 	/*
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 4890633..11cdde9 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -287,8 +287,7 @@ void symsrc__destroy(struct symsrc *ss)
 
 int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
 				struct symsrc *ss __maybe_unused,
-				struct map *map __maybe_unused,
-				symbol_filter_t filter __maybe_unused)
+				struct map *map __maybe_unused)
 {
 	return 0;
 }
@@ -334,7 +333,6 @@ enum dso_type dso__type_fd(int fd)
 int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
 		  struct symsrc *ss,
 		  struct symsrc *runtime_ss __maybe_unused,
-		  symbol_filter_t filter __maybe_unused,
 		  int kmodule __maybe_unused)
 {
 	unsigned char build_id[BUILD_ID_SIZE];
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 4c5788f..19c9c55 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -24,10 +24,8 @@
 #include <symbol/kallsyms.h>
 #include <sys/utsname.h>
 
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
-				symbol_filter_t filter);
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
-			symbol_filter_t filter);
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
 static bool symbol__is_idle(const char *name);
 
 int vmlinux_path__nr_entries;
@@ -630,8 +628,7 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
 	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
-					 symbol_filter_t filter)
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
 {
 	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
@@ -660,7 +657,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
 
 		curr_map = map_groups__find(kmaps, map->type, pos->start);
 
-		if (!curr_map || (filter && filter(curr_map, pos))) {
+		if (!curr_map) {
 			symbol__delete(pos);
 			continue;
 		}
@@ -683,8 +680,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
-			       symbol_filter_t filter)
+static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 {
 	struct map_groups *kmaps = map__kmaps(map);
 	struct machine *machine;
@@ -761,7 +757,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 
 			if (count == 0) {
 				curr_map = map;
-				goto filter_symbol;
+				goto add_symbol;
 			}
 
 			if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -793,18 +789,18 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 			pos->start -= delta;
 			pos->end -= delta;
 		}
-filter_symbol:
-		if (filter && filter(curr_map, pos)) {
-discard_symbol:		rb_erase(&pos->rb_node, root);
-			symbol__delete(pos);
-		} else {
-			if (curr_map != map) {
-				rb_erase(&pos->rb_node, root);
-				symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
-				++moved;
-			} else
-				++count;
-		}
+add_symbol:
+		if (curr_map != map) {
+			rb_erase(&pos->rb_node, root);
+			symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			++moved;
+		} else
+			++count;
+
+		continue;
+discard_symbol:
+		rb_erase(&pos->rb_node, root);
+		symbol__delete(pos);
 	}
 
 	if (curr_map != map &&
@@ -1244,7 +1240,7 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
 }
 
 int __dso__load_kallsyms(struct dso *dso, const char *filename,
-			 struct map *map, bool no_kcore, symbol_filter_t filter)
+			 struct map *map, bool no_kcore)
 {
 	u64 delta = 0;
 
@@ -1266,19 +1262,18 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
 	if (!no_kcore && !dso__load_kcore(dso, map, filename))
-		return dso__split_kallsyms_for_kcore(dso, map, filter);
+		return dso__split_kallsyms_for_kcore(dso, map);
 	else
-		return dso__split_kallsyms(dso, map, delta, filter);
+		return dso__split_kallsyms(dso, map, delta);
 }
 
 int dso__load_kallsyms(struct dso *dso, const char *filename,
-		       struct map *map, symbol_filter_t filter)
+		       struct map *map)
 {
-	return __dso__load_kallsyms(dso, filename, map, false, filter);
+	return __dso__load_kallsyms(dso, filename, map, false);
 }
 
-static int dso__load_perf_map(struct dso *dso, struct map *map,
-			      symbol_filter_t filter)
+static int dso__load_perf_map(struct dso *dso, struct map *map)
 {
 	char *line = NULL;
 	size_t n;
@@ -1320,12 +1315,8 @@ static int dso__load_perf_map(struct dso *dso, struct map *map,
 		if (sym == NULL)
 			goto out_delete_line;
 
-		if (filter && filter(map, sym))
-			symbol__delete(sym);
-		else {
-			symbols__insert(&dso->symbols[map->type], sym);
-			nr_syms++;
-		}
+		symbols__insert(&dso->symbols[map->type], sym);
+		nr_syms++;
 	}
 
 	free(line);
@@ -1381,7 +1372,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	}
 }
 
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
+int dso__load(struct dso *dso, struct map *map)
 {
 	char *name;
 	int ret = -1;
@@ -1404,9 +1395,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 
 	if (dso->kernel) {
 		if (dso->kernel == DSO_TYPE_KERNEL)
-			ret = dso__load_kernel_sym(dso, map, filter);
+			ret = dso__load_kernel_sym(dso, map);
 		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
-			ret = dso__load_guest_kernel_sym(dso, map, filter);
+			ret = dso__load_guest_kernel_sym(dso, map);
 
 		goto out;
 	}
@@ -1430,7 +1421,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 			goto out;
 		}
 
-		ret = dso__load_perf_map(dso, map, filter);
+		ret = dso__load_perf_map(dso, map);
 		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
 					     DSO_BINARY_TYPE__NOT_FOUND;
 		goto out;
@@ -1521,14 +1512,14 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 			kmod = true;
 
 	if (syms_ss)
-		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
 	else
 		ret = -1;
 
 	if (ret > 0) {
 		int nr_plt;
 
-		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter);
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
 		if (nr_plt > 0)
 			ret += nr_plt;
 	}
@@ -1567,8 +1558,7 @@ out_unlock:
 }
 
 int dso__load_vmlinux(struct dso *dso, struct map *map,
-		      const char *vmlinux, bool vmlinux_allocated,
-		      symbol_filter_t filter)
+		      const char *vmlinux, bool vmlinux_allocated)
 {
 	int err = -1;
 	struct symsrc ss;
@@ -1588,7 +1578,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
 		return -1;
 
-	err = dso__load_sym(dso, map, &ss, &ss, filter, 0);
+	err = dso__load_sym(dso, map, &ss, &ss, 0);
 	symsrc__destroy(&ss);
 
 	if (err > 0) {
@@ -1604,8 +1594,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	return err;
 }
 
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
-			   symbol_filter_t filter)
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
 {
 	int i, err = 0;
 	char *filename = NULL;
@@ -1614,7 +1603,7 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 		 vmlinux_path__nr_entries + 1);
 
 	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter);
+		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
 		if (err > 0)
 			goto out;
 	}
@@ -1622,7 +1611,7 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 	if (!symbol_conf.ignore_vmlinux_buildid)
 		filename = dso__build_id_filename(dso, NULL, 0);
 	if (filename != NULL) {
-		err = dso__load_vmlinux(dso, map, filename, true, filter);
+		err = dso__load_vmlinux(dso, map, filename, true);
 		if (err > 0)
 			goto out;
 		free(filename);
@@ -1736,8 +1725,7 @@ proc_kallsyms:
 	return strdup(path);
 }
 
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
-				symbol_filter_t filter)
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -1763,12 +1751,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 	}
 
 	if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
-		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name,
-					 false, filter);
+		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
 	}
 
 	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
-		err = dso__load_vmlinux_path(dso, map, filter);
+		err = dso__load_vmlinux_path(dso, map);
 		if (err > 0)
 			return err;
 	}
@@ -1784,7 +1771,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 	kallsyms_filename = kallsyms_allocated_filename;
 
 do_kallsyms:
-	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
@@ -1799,8 +1786,7 @@ do_kallsyms:
 	return err;
 }
 
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
-				      symbol_filter_t filter)
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -1822,7 +1808,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 		if (symbol_conf.default_guest_vmlinux_name != NULL) {
 			err = dso__load_vmlinux(dso, map,
 						symbol_conf.default_guest_vmlinux_name,
-						false, filter);
+						false);
 			return err;
 		}
 
@@ -1834,7 +1820,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 		kallsyms_filename = path;
 	}
 
-	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	if (err > 0 && !dso__is_kcore(dso)) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 72d2931..0dacfb7 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -241,16 +241,13 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 bool symsrc__has_symtab(struct symsrc *ss);
 bool symsrc__possibly_runtime(struct symsrc *ss);
 
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
+int dso__load(struct dso *dso, struct map *map);
 int dso__load_vmlinux(struct dso *dso, struct map *map,
-		      const char *vmlinux, bool vmlinux_allocated,
-		      symbol_filter_t filter);
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
-			   symbol_filter_t filter);
+		      const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
 int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
-			 bool no_kcore, symbol_filter_t filter);
-int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
-		       symbol_filter_t filter);
+			 bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
 
 void dso__insert_symbol(struct dso *dso, enum map_type type,
 			struct symbol *sym);
@@ -298,10 +295,9 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
 			 const char *dir, int unset __maybe_unused);
 
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
-		  struct symsrc *runtime_ss, symbol_filter_t filter,
-		  int kmodule);
+		  struct symsrc *runtime_ss, int kmodule);
 int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
-				struct map *map, symbol_filter_t filter);
+				struct map *map);
 
 void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
 void symbols__insert(struct rb_root *symbols, struct symbol *sym);
-- 
cgit v0.10.2


From 0ac3348e502423cf2fe86beca83b8835a2e6d289 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Tue, 6 Sep 2016 04:58:27 +0000
Subject: perf tools: Recognize hugetlb mapping as anon mapping

Hugetlbfs mapping should be recognized as anon mapping so user has a
chance to create /tmp/perf-<pid>.map file for symbol resolving. This
patch utilizes MAP_HUGETLB to identify hugetlb mapping.

After this patch, if perf is started before a program starts using huge
pages (so perf gets MMAP2 events from kernel), perf is able to recognize
hugetlb mapping as anon mapping.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Nilay Vaish <nilayvaish@gmail.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1473137909-142064-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 0c54adb..d51a125 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <sys/mman.h>
 #include "map.h"
 #include "thread.h"
 #include "strlist.h"
@@ -24,9 +25,15 @@ const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__VARIABLE] = "Variables",
 };
 
-static inline int is_anon_memory(const char *filename)
+static inline int is_anon_memory(const char *filename, u32 flags)
 {
-	return !strcmp(filename, "//anon") ||
+	u32 anon_flags = 0;
+
+#ifdef MAP_HUGETLB
+	anon_flags |= MAP_HUGETLB;
+#endif
+	return flags & anon_flags ||
+	       !strcmp(filename, "//anon") ||
 	       !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
 	       !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
 }
@@ -155,7 +162,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 		int anon, no_dso, vdso, android;
 
 		android = is_android_lib(filename);
-		anon = is_anon_memory(filename);
+		anon = is_anon_memory(filename, flags);
 		vdso = is_vdso_map(filename);
 		no_dso = is_no_dso_memory(filename);
 
-- 
cgit v0.10.2


From 5e7be3e1f9d71ed03fac27df25e143815be662d2 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Tue, 6 Sep 2016 04:58:28 +0000
Subject: tools lib api fs: Add hugetlbfs filesystem detector

Detect hugetlbfs. hugetlbfs__mountpoint() will be used during recording
to help identifying hugetlb mmaps: which should be recognized as anon
mapping.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Reviewed-by: Nilay Vaish <nilayvaish@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Hou Pengyang <houpengyang@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1473137909-142064-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ba7094b..f99f49e4 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -34,6 +34,10 @@
 #define TRACEFS_MAGIC          0x74726163
 #endif
 
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC        0x958458f6
+#endif
+
 static const char * const sysfs__fs_known_mountpoints[] = {
 	"/sys",
 	0,
@@ -67,6 +71,10 @@ static const char * const tracefs__known_mountpoints[] = {
 	0,
 };
 
+static const char * const hugetlbfs__known_mountpoints[] = {
+	0,
+};
+
 struct fs {
 	const char		*name;
 	const char * const	*mounts;
@@ -80,6 +88,7 @@ enum {
 	FS__PROCFS  = 1,
 	FS__DEBUGFS = 2,
 	FS__TRACEFS = 3,
+	FS__HUGETLBFS = 4,
 };
 
 #ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@ static struct fs fs__entries[] = {
 		.mounts	= tracefs__known_mountpoints,
 		.magic	= TRACEFS_MAGIC,
 	},
+	[FS__HUGETLBFS] = {
+		.name	= "hugetlbfs",
+		.mounts = hugetlbfs__known_mountpoints,
+		.magic	= HUGETLBFS_MAGIC,
+	},
 };
 
 static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@ FS(sysfs,   FS__SYSFS);
 FS(procfs,  FS__PROCFS);
 FS(debugfs, FS__DEBUGFS);
 FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
 
 int filename__read_int(const char *filename, int *value)
 {
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 16c9c2e..a63269f 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -21,6 +21,7 @@ FS(sysfs)
 FS(procfs)
 FS(debugfs)
 FS(tracefs)
+FS(hugetlbfs)
 
 #undef FS
 
-- 
cgit v0.10.2


From d7e404af115bb4996afa4a0236020969ab007554 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Tue, 6 Sep 2016 04:58:29 +0000
Subject: perf record: Mark MAP_HUGETLB when synthesizing mmap events

When synthesizing mmap events, add MAP_HUGETLB map flag if the source of
mapping is file in hugetlbfs.

After this patch, perf can identify hugetlb mapping even if perf is
started after the mapping of huge pages (like with 'perf top').

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Reviewed-by: Nilay Vaish <nilayvaish@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Hou Pengyang <houpengyang@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1473137909-142064-4-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9ad7d32..6c30171 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,6 @@
 #include <linux/types.h>
 #include <sys/mman.h>
+#include <api/fs/fs.h>
 #include "event.h"
 #include "debug.h"
 #include "hist.h"
@@ -248,6 +249,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
+#ifdef MAP_HUGETLB
+	const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+	int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+#endif
 
 	if (machine__is_default_guest(machine))
 		return 0;
@@ -342,6 +347,12 @@ out:
 
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
+#ifdef MAP_HUGETLB
+		if (!strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+			strcpy(execname, anonstr);
+			event->mmap2.flags |= MAP_HUGETLB;
+		}
+#endif
 
 		size = strlen(execname) + 1;
 		memcpy(event->mmap2.filename, execname, size);
-- 
cgit v0.10.2


From 70fbe0574558e934f93bde26e4949c8c206bae43 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 5 Sep 2016 16:08:12 -0300
Subject: perf annotate: Add branch stack / basic block

I wanted to know the hottest path through a function and figured the
branch-stack (LBR) information should be able to help out with that.

The below uses the branch-stack to create basic blocks and generate
statistics from them.

        from    to              branch_i
        * ----> *
                |
                | block
                v
                * ----> *
                from    to      branch_i+1

The blocks are broken down into non-overlapping ranges, while tracking
if the start of each range is an entry point and/or the end of a range
is a branch.

Each block iterates all ranges it covers (while splitting where required
to exactly match the block) and increments the 'coverage' count.

For the range including the branch we increment the taken counter, as
well as the pred counter if flags.predicted.

Using these number we can find if an instruction:

 - had coverage; given by:

        br->coverage / br->sym->max_coverage

   This metric ensures each symbol has a 100% spot, which reflects the
   observation that each symbol must have a most covered/hottest
   block.

 - is a branch target: br->is_target && br->start == add

 - for targets, how much of a branch's coverages comes from it:

	target->entry / branch->coverage

 - is a branch: br->is_branch && br->end == addr

 - for branches, how often it was taken:

        br->taken / br->coverage

   after all, all execution that didn't take the branch would have
   incremented the coverage and continued onward to a later branch.

 - for branches, how often it was predicted:

        br->pred / br->taken

The coverage percentage is used to color the address and asm sections;
for low (<1%) coverage we use NORMAL (uncolored), indicating that these
instructions are not 'important'. For high coverage (>75%) we color the
address RED.

For each branch, we add an asm comment after the instruction with
information on how often it was taken and predicted.

Output looks like (sans color, which does loose a lot of the
information :/)

$ perf record --branch-filter u,any -e cycles:p ./branches 27
$ perf annotate branches

 Percent |	Source code & Disassembly of branches for cycles:pu (217 samples)
---------------------------------------------------------------------------------
         :	branches():
    0.00 :	  40057a:       push   %rbp
    0.00 :	  40057b:       mov    %rsp,%rbp
    0.00 :	  40057e:       sub    $0x20,%rsp
    0.00 :	  400582:       mov    %rdi,-0x18(%rbp)
    0.00 :	  400586:       mov    %rsi,-0x20(%rbp)
    0.00 :	  40058a:       mov    -0x18(%rbp),%rax
    0.00 :	  40058e:       mov    %rax,-0x10(%rbp)
    0.00 :	  400592:       movq   $0x0,-0x8(%rbp)
    0.00 :	  40059a:       jmpq   400656 <branches+0xdc>
    1.84 :	  40059f:       mov    -0x10(%rbp),%rax	# +100.00%
    3.23 :	  4005a3:       and    $0x1,%eax
    1.84 :	  4005a6:       test   %rax,%rax
    0.00 :	  4005a9:       je     4005bf <branches+0x45>	# -54.50% (p:42.00%)
    0.46 :	  4005ab:       mov    0x200bbe(%rip),%rax        # 601170 <acc>
   12.90 :	  4005b2:       add    $0x1,%rax
    2.30 :	  4005b6:       mov    %rax,0x200bb3(%rip)        # 601170 <acc>
    0.46 :	  4005bd:       jmp    4005d1 <branches+0x57>	# -100.00% (p:100.00%)
    0.92 :	  4005bf:       mov    0x200baa(%rip),%rax        # 601170 <acc>	# +49.54%
   13.82 :	  4005c6:       sub    $0x1,%rax
    0.46 :	  4005ca:       mov    %rax,0x200b9f(%rip)        # 601170 <acc>
    2.30 :	  4005d1:       mov    -0x10(%rbp),%rax	# +50.46%
    0.46 :	  4005d5:       mov    %rax,%rdi
    0.46 :	  4005d8:       callq  400526 <lfsr>	# -100.00% (p:100.00%)
    0.00 :	  4005dd:       mov    %rax,-0x10(%rbp)	# +100.00%
    0.92 :	  4005e1:       mov    -0x18(%rbp),%rax
    0.00 :	  4005e5:       and    $0x1,%eax
    0.00 :	  4005e8:       test   %rax,%rax
    0.00 :	  4005eb:       je     4005ff <branches+0x85>	# -100.00% (p:100.00%)
    0.00 :	  4005ed:       mov    0x200b7c(%rip),%rax        # 601170 <acc>
    0.00 :	  4005f4:       shr    $0x2,%rax
    0.00 :	  4005f8:       mov    %rax,0x200b71(%rip)        # 601170 <acc>
    0.00 :	  4005ff:       mov    -0x10(%rbp),%rax	# +100.00%
    7.37 :	  400603:       and    $0x1,%eax
    3.69 :	  400606:       test   %rax,%rax
    0.00 :	  400609:       jne    400612 <branches+0x98>	# -59.25% (p:42.99%)
    1.84 :	  40060b:       mov    $0x1,%eax
   14.29 :	  400610:       jmp    400617 <branches+0x9d>	# -100.00% (p:100.00%)
    1.38 :	  400612:       mov    $0x0,%eax	# +57.65%
   10.14 :	  400617:       test   %al,%al	# +42.35%
    0.00 :	  400619:       je     40062f <branches+0xb5>	# -57.65% (p:100.00%)
    0.46 :	  40061b:       mov    0x200b4e(%rip),%rax        # 601170 <acc>
    2.76 :	  400622:       sub    $0x1,%rax
    0.00 :	  400626:       mov    %rax,0x200b43(%rip)        # 601170 <acc>
    0.46 :	  40062d:       jmp    400641 <branches+0xc7>	# -100.00% (p:100.00%)
    0.92 :	  40062f:       mov    0x200b3a(%rip),%rax        # 601170 <acc>	# +56.13%
    2.30 :	  400636:       add    $0x1,%rax
    0.92 :	  40063a:       mov    %rax,0x200b2f(%rip)        # 601170 <acc>
    0.92 :	  400641:       mov    -0x10(%rbp),%rax	# +43.87%
    2.30 :	  400645:       mov    %rax,%rdi
    0.00 :	  400648:       callq  400526 <lfsr>	# -100.00% (p:100.00%)
    0.00 :	  40064d:       mov    %rax,-0x10(%rbp)	# +100.00%
    1.84 :	  400651:       addq   $0x1,-0x8(%rbp)
    0.92 :	  400656:       mov    -0x8(%rbp),%rax
    5.07 :	  40065a:       cmp    -0x20(%rbp),%rax
    0.00 :	  40065e:       jb     40059f <branches+0x25>	# -100.00% (p:100.00%)
    0.00 :	  400664:       nop
    0.00 :	  400665:       leaveq
    0.00 :	  400666:       retq

(Note: the --branch-filter u,any was used to avoid spurious target and
branch points due to interrupts/faults, they show up as very small -/+
annotations on 'weird' locations)

Committer note:

Please take a look at:

  http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png

To see the colors.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
[ Moved sym->max_coverage to 'struct annotate', aka symbol__annotate(sym) ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f07b230..ebb6283 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
 #include "util/tool.h"
 #include "util/data.h"
 #include "arch/common.h"
+#include "util/block-range.h"
 
 #include <dlfcn.h>
 #include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+/*
+ * Given one basic block:
+ *
+ *	from	to		branch_i
+ *	* ----> *
+ *		|
+ *		| block
+ *		v
+ *		* ----> *
+ *		from	to	branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+				struct addr_map_symbol *end,
+				struct branch_flags *flags)
+{
+	struct symbol *sym = start->sym;
+	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+	struct block_range_iter iter;
+	struct block_range *entry;
+
+	/*
+	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
+	 */
+	if (!start->addr || start->addr > end->addr)
+		return;
+
+	iter = block_range__create(start->addr, end->addr);
+	if (!block_range_iter__valid(&iter))
+		return;
+
+	/*
+	 * First block in range is a branch target.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_target);
+	entry->entry++;
+
+	do {
+		entry = block_range_iter(&iter);
+
+		entry->coverage++;
+		entry->sym = sym;
+
+		if (notes)
+			notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+	} while (block_range_iter__next(&iter));
+
+	/*
+	 * Last block in rage is a branch.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_branch);
+	entry->taken++;
+	if (flags->predicted)
+		entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+				 struct perf_sample *sample)
+{
+	struct addr_map_symbol *prev = NULL;
+	struct branch_info *bi;
+	int i;
+
+	if (!bs || !bs->nr)
+		return;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return;
+
+	for (i = bs->nr - 1; i >= 0; i--) {
+		/*
+		 * XXX filter against symbol
+		 */
+		if (prev)
+			process_basic_block(prev, &bi[i].from, &bi[i].flags);
+		prev = &bi[i].to;
+	}
+
+	free(bi);
+}
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 		return 0;
 	}
 
+	/*
+	 * XXX filtered samples can still have branch entires pointing into our
+	 * symbol and are missed.
+	 */
+	process_branch_stack(sample->branch_stack, al, sample);
+
 	sample->period = 1;
 	sample->weight = 1;
 
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f1a6d17..96f99d6 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,5 +1,6 @@
 libperf-y += alias.o
 libperf-y += annotate.o
+libperf-y += block-range.o
 libperf-y += build-id.o
 libperf-y += config.o
 libperf-y += ctype.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 2ff6bd7..7a80c73 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
 #include "debug.h"
 #include "annotate.h"
 #include "evsel.h"
+#include "block-range.h"
 #include <regex.h>
 #include <pthread.h>
 #include <linux/bitops.h>
@@ -859,6 +860,89 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 	return percent;
 }
 
+static const char *annotate__address_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark red for >75% coverage */
+		if (cov > 0.75)
+			return PERF_COLOR_RED;
+
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+	bool emit_comment = true;
+
+	if (!br)
+		return;
+
+#if 1
+	if (br->is_target && br->start == addr) {
+		struct block_range *branch = br;
+		double p;
+
+		/*
+		 * Find matching branch to our target.
+		 */
+		while (!branch->is_branch)
+			branch = block_range__next(branch);
+
+		p = 100 *(double)br->entry / branch->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage joined at this target in relation
+			 * to the next branch.
+			 */
+			printf(" +%.2f%%", p);
+		}
+	}
+#endif
+	if (br->is_branch && br->end == addr) {
+		double p = 100*(double)br->taken / br->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage leaving at this branch, and
+			 * its prediction ratio.
+			 */
+			printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred  / br->taken);
+		}
+	}
+}
+
+
 static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
 		      struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
 		      int max_lines, struct disasm_line *queue)
@@ -878,6 +962,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		s64 offset = dl->offset;
 		const u64 addr = start + offset;
 		struct disasm_line *next;
+		struct block_range *br;
 
 		next = disasm__get_next_ip_line(&notes->src->source, dl);
 
@@ -947,8 +1032,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		}
 
 		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_MAGENTA, "  %" PRIx64 ":", addr);
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
+
+		br = block_range__find(addr);
+		color_fprintf(stdout, annotate__address_color(br), "  %" PRIx64 ":", addr);
+		color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
+		annotate__branch_printf(br, addr);
+		printf("\n");
 
 		if (ppercents != &percent)
 			free(ppercents);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index e96f4da..ea44e4f 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -130,6 +130,7 @@ struct annotated_source {
 
 struct annotation {
 	pthread_mutex_t		lock;
+	u64			max_coverage;
 	struct annotated_source *src;
 };
 
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 0000000..7b3e1d7
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,328 @@
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+	struct rb_root root;
+	u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+	/*
+	 * XXX still paranoid for now; see if we can make this depend on
+	 * DEBUG=1 builds.
+	 */
+#if 1
+	struct rb_node *rb;
+	u64 old = 0; /* NULL isn't executable */
+
+	for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+		struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+		assert(old < entry->start);
+		assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+		old = entry->end;
+	}
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *parent = NULL;
+	struct block_range *entry;
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (addr < entry->start)
+			p = &parent->rb_left;
+		else if (addr > entry->end)
+			p = &parent->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_left;
+	while (*p) {
+		node = *p;
+		p = &node->rb_right;
+	}
+	rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_right;
+	while (*p) {
+		node = *p;
+		p = &node->rb_left;
+	}
+	rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end:   branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *n, *parent = NULL;
+	struct block_range *next, *entry = NULL;
+	struct block_range_iter iter = { NULL, NULL };
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (start < entry->start)
+			p = &parent->rb_left;
+		else if (start > entry->end)
+			p = &parent->rb_right;
+		else
+			break;
+	}
+
+	/*
+	 * Didn't find anything.. there's a hole at @start, however @end might
+	 * be inside/behind the next range.
+	 */
+	if (!*p) {
+		if (!entry) /* tree empty */
+			goto do_whole;
+
+		/*
+		 * If the last node is before, advance one to find the next.
+		 */
+		n = parent;
+		if (entry->end < start) {
+			n = rb_next(n);
+			if (!n)
+				goto do_whole;
+		}
+		next = rb_entry(n, struct block_range, node);
+
+		if (next->start <= end) { /* add head: [start...][n->start...] */
+			struct block_range *head = malloc(sizeof(struct block_range));
+			if (!head)
+				return iter;
+
+			*head = (struct block_range){
+				.start		= start,
+				.end		= next->start - 1,
+				.is_target	= 1,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&head->node, &next->node);
+			rb_insert_color(&head->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.start = head;
+			goto do_tail;
+		}
+
+do_whole:
+		/*
+		 * The whole [start..end] range is non-overlapping.
+		 */
+		entry = malloc(sizeof(struct block_range));
+		if (!entry)
+			return iter;
+
+		*entry = (struct block_range){
+			.start		= start,
+			.end		= end,
+			.is_target	= 1,
+			.is_branch	= 1,
+		};
+
+		rb_link_node(&entry->node, parent, p);
+		rb_insert_color(&entry->node, &block_ranges.root);
+		block_range__debug();
+
+		iter.start = entry;
+		iter.end   = entry;
+		goto done;
+	}
+
+	/*
+	 * We found a range that overlapped with ours, split if needed.
+	 */
+	if (entry->start < start) { /* split: [e->start...][start...] */
+		struct block_range *head = malloc(sizeof(struct block_range));
+		if (!head)
+			return iter;
+
+		*head = (struct block_range){
+			.start		= entry->start,
+			.end		= start - 1,
+			.is_target	= entry->is_target,
+			.is_branch	= 0,
+
+			.coverage	= entry->coverage,
+			.entry		= entry->entry,
+		};
+
+		entry->start		= start;
+		entry->is_target	= 1;
+		entry->entry		= 0;
+
+		rb_link_left_of_node(&head->node, &entry->node);
+		rb_insert_color(&head->node, &block_ranges.root);
+		block_range__debug();
+
+	} else if (entry->start == start)
+		entry->is_target = 1;
+
+	iter.start = entry;
+
+do_tail:
+	/*
+	 * At this point we've got: @iter.start = [@start...] but @end can still be
+	 * inside or beyond it.
+	 */
+	entry = iter.start;
+	for (;;) {
+		/*
+		 * If @end is inside @entry, split.
+		 */
+		if (end < entry->end) { /* split: [...end][...e->end] */
+			struct block_range *tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= end + 1,
+				.end		= entry->end,
+				.is_target	= 0,
+				.is_branch	= entry->is_branch,
+
+				.coverage	= entry->coverage,
+				.taken		= entry->taken,
+				.pred		= entry->pred,
+			};
+
+			entry->end		= end;
+			entry->is_branch	= 1;
+			entry->taken		= 0;
+			entry->pred		= 0;
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = entry;
+			goto done;
+		}
+
+		/*
+		 * If @end matches @entry, done
+		 */
+		if (end == entry->end) {
+			entry->is_branch = 1;
+			iter.end = entry;
+			goto done;
+		}
+
+		next = block_range__next(entry);
+		if (!next)
+			goto add_tail;
+
+		/*
+		 * If @end is in beyond @entry but not inside @next, add tail.
+		 */
+		if (end < next->start) { /* add tail: [...e->end][...end] */
+			struct block_range *tail;
+add_tail:
+			tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= end,
+				.is_target	= 0,
+				.is_branch	= 1,
+			};
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = tail;
+			goto done;
+		}
+
+		/*
+		 * If there is a hole between @entry and @next, fill it.
+		 */
+		if (entry->end + 1 != next->start) {
+			struct block_range *hole = malloc(sizeof(struct block_range));
+			if (!hole)
+				return iter;
+
+			*hole = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= next->start - 1,
+				.is_target	= 0,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&hole->node, &next->node);
+			rb_insert_color(&hole->node, &block_ranges.root);
+			block_range__debug();
+		}
+
+		entry = next;
+	}
+
+done:
+	assert(iter.start->start == start && iter.start->is_target);
+	assert(iter.end->end == end && iter.end->is_branch);
+
+	block_ranges.blocks++;
+
+	return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ *    br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+	struct symbol *sym;
+
+	if (!br) {
+		if (block_ranges.blocks)
+			return 0;
+
+		return -1;
+	}
+
+	sym = br->sym;
+	if (!sym)
+		return -1;
+
+	return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 0000000..a8c8413
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,71 @@
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include "symbol.h"
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node:	treenode
+ * @start:	inclusive start of range
+ * @end:	inclusive end of range
+ * @is_target:	@start is a jump target
+ * @is_branch:	@end is a branch instruction
+ * @coverage:	number of blocks that cover this range
+ * @taken:	number of times the branch is taken (requires @is_branch)
+ * @pred:	number of times the taken branch was predicted
+ */
+struct block_range {
+	struct rb_node node;
+
+	struct symbol *sym;
+
+	u64 start;
+	u64 end;
+
+	int is_target, is_branch;
+
+	u64 coverage;
+	u64 entry;
+	u64 taken;
+	u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+	struct rb_node *n = rb_next(&br->node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+	struct block_range *start;
+	struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+	return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+	if (iter->start == iter->end)
+		return false;
+
+	iter->start = block_range__next(iter->start);
+	return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+	if (!iter->start || !iter->end)
+		return false;
+	return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
-- 
cgit v0.10.2


From 9f21b815be863218192f42f9f5bf78b75f8738e0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 8 Sep 2016 11:21:51 +0100
Subject: perf evlist: Only open events on CPUs an evsel permits

In systems with heterogeneous CPU PMUs, it's possible for each evsel to
cover a distinct set of CPUs, and hence the cpu_map associated with each
evsel may have a distinct idx<->id mapping. Any of these may be distinct
from the evlist's cpu map.

Events can be tied to the same fd so long as they use the same per-cpu
ringbuffer (i.e. so long as they are on the same CPU). To acquire the
correct FDs, we must compare the Linux logical IDs rather than the evsel
or evlist indices.

This path adds logic to perf_evlist__mmap_per_evsel to handle this,
translating IDs as required. As PMUs may cover a subset of CPUs from the
evlist, we skip the CPUs a PMU cannot handle.

Without this patch, perf record may try to mmap erroneous FDs on
heterogeneous systems, and will bail out early rather than running the
workload.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1473330112-28528-7-git-send-email-mark.rutland@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 097b3ed..ea34c5a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1032,16 +1032,18 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
 }
 
 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
-				       struct mmap_params *mp, int cpu,
+				       struct mmap_params *mp, int cpu_idx,
 				       int thread, int *_output, int *_output_backward)
 {
 	struct perf_evsel *evsel;
 	int revent;
+	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
 
 	evlist__for_each_entry(evlist, evsel) {
 		struct perf_mmap *maps = evlist->mmap;
 		int *output = _output;
 		int fd;
+		int cpu;
 
 		if (evsel->attr.write_backward) {
 			output = _output_backward;
@@ -1060,6 +1062,10 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 		if (evsel->system_wide && thread)
 			continue;
 
+		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
+		if (cpu == -1)
+			continue;
+
 		fd = FD(evsel, cpu, thread);
 
 		if (*output == -1) {
-- 
cgit v0.10.2


From 7e3fcffe955440101493cd8f32f75840ddf87b6f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 8 Sep 2016 11:21:52 +0100
Subject: perf pmu: Support alternative sysfs cpumask

The perf tools can read a cpumask file for a PMU, describing a subset of
CPUs which that PMU covers. So far this has only been used to cater for
uncore PMUs, which in practice happen to only have a single CPU
described in the mask.

Until recently, the perf tools only correctly handled cpumask containing
a single CPU, and only when monitoring in system-wide mode. For example,
prior to commit 00e727bb389359c8 ("perf stat: Balance opening and
reading events"), a mask with more than a single CPU could cause perf
stat to hang. When a CPU PMU covers a subset of CPUs, but lacks a
cpumask, perf record will fail to open events (on the cores the PMU does
not support), and gives up.

For systems with heterogeneous CPUs such as ARM big.LITTLE systems, this
presents a problem. We have a PMU for each microarchitecture (e.g. a big
PMU and a little PMU), and would like to expose a cpumask for each (so
as to allow perf record and other tools to do the right thing). However,
doing so kernel-side will cause old perf binaries to not function (e.g.
hitting the issue solved by 00e727bb389359c8), and thus commits the
cardinal sin of breaking (existing) userspace.

To address this chicken-and-egg problem, this patch adds support got a
new file, cpus, which is largely identical to the existing cpumask file.
A kernel can expose this file, knowing that new perf binaries will
correctly support it, while old perf binaries will not look for it (and
thus will not be broken).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1473330112-28528-8-git-send-email-mark.rutland@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ddb0261..2babcdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -445,14 +445,23 @@ static struct cpu_map *pmu_cpumask(const char *name)
 	FILE *file;
 	struct cpu_map *cpus;
 	const char *sysfs = sysfs__mountpoint();
+	const char *templates[] = {
+		 "%s/bus/event_source/devices/%s/cpumask",
+		 "%s/bus/event_source/devices/%s/cpus",
+		 NULL
+	};
+	const char **template;
 
 	if (!sysfs)
 		return NULL;
 
-	snprintf(path, PATH_MAX,
-		 "%s/bus/event_source/devices/%s/cpumask", sysfs, name);
+	for (template = templates; *template; template++) {
+		snprintf(path, PATH_MAX, *template, sysfs, name);
+		if (stat(path, &st) == 0)
+			break;
+	}
 
-	if (stat(path, &st) < 0)
+	if (!*template)
 		return NULL;
 
 	file = fopen(path, "r");
-- 
cgit v0.10.2


From 25b8592e912f085ce2ff736a2927584ddeab238c Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Wed, 31 Aug 2016 13:33:11 +0530
Subject: perf powerpc: Fix build-test failure

'make -C tools/perf build-test' is failing with below log for poewrpc.

  In file included from /tmp/tmp.3eEwmGlYaF/perf-4.8.0-rc4/tools/perf/perf.h:15:0,
                   from util/cpumap.h:8,
                   from util/env.c:1:
  /tmp/tmp.3eEwmGlYaF/perf-4.8.0-rc4/tools/perf/perf-sys.h:23:56:
  fatal error: ../../arch/powerpc/include/uapi/asm/unistd.h: No such file or directory
  compilation terminated.

I bisected it and found it's failing from commit ad430729ae00 ("Remove:
kernel unistd*h files from perf's MANIFEST, not used").

Header file '../../arch/powerpc/include/uapi/asm/unistd.h' is included
only for powerpc in tools/perf/perf-sys.h.

By looking closly at commit history, I found little weird thing:

Commit f2d9cae9ea9e ("perf powerpc: Use uapi/unistd.h to fix build
error") replaced 'asm/unistd.h' with 'uapi/asm/unistd.h'

Commit d2709c7ce4c5 ("perf: Make perf build for x86 with UAPI
disintegration applied") removes all arch specific 'uapi/asm/unistd.h'
for all archs and adds generic <asm/unistd.h>.

Commit f0b9abfb0446 ("Merge branch 'linus' into perf/core") again
includes 'uapi/asm/unistd.h' for powerpc. Don't know how exactly this
happened as this change is not part of commit also.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1472630591-5089-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Fixes: ad430729ae00 ("Remove: kernel unistd*h files from perf's MANIFEST, not used")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ed72a4..e4b717e 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -20,7 +20,6 @@
 #endif
 
 #ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
 #define CPUINFO_PROC	{"cpu"}
 #endif
 
-- 
cgit v0.10.2


From 2668c6195685f4b6f281767d10b4f4f2e32c2305 Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Thu, 8 Sep 2016 17:08:57 +0800
Subject: perf/x86/rapl: Enable Apollo Lake RAPL support

This patch enables RAPL counters (energy consumption counters)
support for Intel Apollo Lake (Goldmont) processors (Model 92):

RAPL of Goldmont, unlikes ESU increment of Silvermont/Airmont,
it likes the Haswell microarchitecture in 1/2^ESU joules and
supports power domains in PP0/PP1/PKG/RAM.

ESU and power domains refer to Intel Software Developers' Manual,
Vol. 3C, Order No. 325384, Table 35-12.

Usage example:

  $ perf list
  $ perf stat -a -e power/energy-cores/,power/energy-pkg/ sleep 10

Signed-off-by: Harry Pan <harry.pan@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: bp@alien8.de
Cc: gs0622@gmail.com
Cc: hpa@zytor.com
Cc: srinivas.pandruvada@linux.intel.com
Link: http://lkml.kernel.org/r/1473325738-730-1-git-send-email-harry.pan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 62bebcc..b0f0e83 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -767,6 +767,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	{},
 };
 
-- 
cgit v0.10.2


From cd34cd97b7b4336aa2c623c37daffab264c7c6ce Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 16 Aug 2016 16:09:50 -0400
Subject: perf/x86/intel/uncore: Add Skylake server uncore support

This patch implements the uncore monitoring driver for Skylake server.
The uncore subsystem in Skylake server is similar to previous
server. There are some differences in config register encoding and pci
device IDs. Besides, Skylake introduces many new boxes to reflect the
MESH architecture changes.

The control registers for IIO and UPI have been extended to 64 bit. This
patch also introduces event_mask_ext to handle the high 32 bit mask.

The CHA box number could vary for different machines. This patch gets
the CHA box number by counting the CHA register space during
initialization at runtime.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1471378190-17276-3-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7b3cc8b..d9844cc 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -685,7 +685,8 @@ static int uncore_pmu_event_init(struct perf_event *event)
 		/* fixed counters have event field hardcoded to zero */
 		hwc->config = 0ULL;
 	} else {
-		hwc->config = event->attr.config & pmu->type->event_mask;
+		hwc->config = event->attr.config &
+			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
 		if (pmu->type->ops->hw_config) {
 			ret = pmu->type->ops->hw_config(box, event);
 			if (ret)
@@ -1323,6 +1324,11 @@ static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
 	.pci_init = skl_uncore_pci_init,
 };
 
+static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
+	.cpu_init = skx_uncore_cpu_init,
+	.pci_init = skx_uncore_pci_init,
+};
+
 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,	  nhm_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,	  nhm_uncore_init),
@@ -1345,6 +1351,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index a43175f..ad986c1 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -44,6 +44,7 @@ struct intel_uncore_type {
 	unsigned perf_ctr;
 	unsigned event_ctl;
 	unsigned event_mask;
+	unsigned event_mask_ext;
 	unsigned fixed_ctr;
 	unsigned fixed_ctl;
 	unsigned box_ctl;
@@ -381,6 +382,8 @@ int bdx_uncore_pci_init(void);
 void bdx_uncore_cpu_init(void);
 int knl_uncore_pci_init(void);
 void knl_uncore_cpu_init(void);
+int skx_uncore_pci_init(void);
+void skx_uncore_cpu_init(void);
 
 /* perf_event_intel_uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3719af5..2724277 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -268,15 +268,72 @@
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
 
+/* SKX pci bus to socket mapping */
+#define SKX_CPUNODEID			0xc0
+#define SKX_GIDNIDMAP			0xd4
+
+/* SKX CHA */
+#define SKX_CHA_MSR_PMON_BOX_FILTER_TID		(0x1ffULL << 0)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK	(0xfULL << 9)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_STATE	(0x3ffULL << 17)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_REM		(0x1ULL << 32)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LOC		(0x1ULL << 33)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC	(0x1ULL << 35)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NM		(0x1ULL << 36)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM	(0x1ULL << 37)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC0	(0x3ffULL << 41)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC1	(0x3ffULL << 51)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_C6		(0x1ULL << 61)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NC		(0x1ULL << 62)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ISOC	(0x1ULL << 63)
+
+/* SKX IIO */
+#define SKX_IIO0_MSR_PMON_CTL0		0xa48
+#define SKX_IIO0_MSR_PMON_CTR0		0xa41
+#define SKX_IIO0_MSR_PMON_BOX_CTL	0xa40
+#define SKX_IIO_MSR_OFFSET		0x20
+
+#define SKX_PMON_CTL_TRESH_MASK		(0xff << 24)
+#define SKX_PMON_CTL_TRESH_MASK_EXT	(0xf)
+#define SKX_PMON_CTL_CH_MASK		(0xff << 4)
+#define SKX_PMON_CTL_FC_MASK		(0x7 << 12)
+#define SKX_IIO_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \
+					 SNBEP_PMON_CTL_UMASK_MASK | \
+					 SNBEP_PMON_CTL_EDGE_DET | \
+					 SNBEP_PMON_CTL_INVERT | \
+					 SKX_PMON_CTL_TRESH_MASK)
+#define SKX_IIO_PMON_RAW_EVENT_MASK_EXT	(SKX_PMON_CTL_TRESH_MASK_EXT | \
+					 SKX_PMON_CTL_CH_MASK | \
+					 SKX_PMON_CTL_FC_MASK)
+
+/* SKX IRP */
+#define SKX_IRP0_MSR_PMON_CTL0		0xa5b
+#define SKX_IRP0_MSR_PMON_CTR0		0xa59
+#define SKX_IRP0_MSR_PMON_BOX_CTL	0xa58
+#define SKX_IRP_MSR_OFFSET		0x20
+
+/* SKX UPI */
+#define SKX_UPI_PCI_PMON_CTL0		0x350
+#define SKX_UPI_PCI_PMON_CTR0		0x318
+#define SKX_UPI_PCI_PMON_BOX_CTL	0x378
+#define SKX_PMON_CTL_UMASK_EXT		0xff
+
+/* SKX M2M */
+#define SKX_M2M_PCI_PMON_CTL0		0x228
+#define SKX_M2M_PCI_PMON_CTR0		0x200
+#define SKX_M2M_PCI_PMON_BOX_CTL	0x258
+
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
 DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
 DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39");
 DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
 DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35");
 DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
 DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
 DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
@@ -284,6 +341,8 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
 DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
 DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
 DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43");
+DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
@@ -292,18 +351,26 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
 DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
 DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
 DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state5, filter_state, "config1:17-26");
+DEFINE_UNCORE_FORMAT_ATTR(filter_rem, filter_rem, "config1:32");
+DEFINE_UNCORE_FORMAT_ATTR(filter_loc, filter_loc, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nm, filter_nm, "config1:36");
+DEFINE_UNCORE_FORMAT_ATTR(filter_not_nm, filter_not_nm, "config1:37");
 DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
 DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_0, filter_opc0, "config1:41-50");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_1, filter_opc1, "config1:51-60");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
 DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
 DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
@@ -3209,3 +3276,525 @@ int bdx_uncore_pci_init(void)
 }
 
 /* end of BDX uncore support */
+
+/* SKX uncore support */
+
+static struct intel_uncore_type skx_uncore_ubox = {
+	.name			= "ubox",
+	.num_counters		= 2,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.fixed_ctr_bits		= 48,
+	.perf_ctr		= HSWEP_U_MSR_PMON_CTR0,
+	.event_ctl		= HSWEP_U_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+	.fixed_ctr		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+	.fixed_ctl		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+	.ops			= &ivbep_uncore_msr_ops,
+	.format_group		= &ivbep_uncore_ubox_format_group,
+};
+
+static struct attribute *skx_uncore_cha_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_tid_en.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	&format_attr_filter_tid4.attr,
+	&format_attr_filter_link4.attr,
+	&format_attr_filter_state5.attr,
+	&format_attr_filter_rem.attr,
+	&format_attr_filter_loc.attr,
+	&format_attr_filter_nm.attr,
+	&format_attr_filter_all_op.attr,
+	&format_attr_filter_not_nm.attr,
+	&format_attr_filter_opc_0.attr,
+	&format_attr_filter_opc_1.attr,
+	&format_attr_filter_nc.attr,
+	&format_attr_filter_c6.attr,
+	&format_attr_filter_isoc.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_chabox_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_cha_formats_attr,
+};
+
+static struct event_constraint skx_uncore_chabox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg skx_uncore_cha_extra_regs[] = {
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4),
+};
+
+static u64 skx_cha_filter_mask(int fields)
+{
+	u64 mask = 0;
+
+	if (fields & 0x1)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+	if (fields & 0x2)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK;
+	if (fields & 0x4)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE;
+	return mask;
+}
+
+static struct event_constraint *
+skx_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	return __snbep_cbox_get_constraint(box, event, skx_cha_filter_mask);
+}
+
+static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct extra_reg *er;
+	int idx = 0;
+
+	for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
+		if (er->event != (event->hw.config & er->config_mask))
+			continue;
+		idx |= er->idx;
+	}
+
+	if (idx) {
+		reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+			    HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+		reg1->config = event->attr.config1 & skx_cha_filter_mask(idx);
+		reg1->idx = idx;
+	}
+	return 0;
+}
+
+static struct intel_uncore_ops skx_uncore_chabox_ops = {
+	/* There is no frz_en for chabox ctl */
+	.init_box		= ivbep_uncore_msr_init_box,
+	.disable_box		= snbep_uncore_msr_disable_box,
+	.enable_box		= snbep_uncore_msr_enable_box,
+	.disable_event		= snbep_uncore_msr_disable_event,
+	.enable_event		= hswep_cbox_enable_event,
+	.read_counter		= uncore_msr_read_counter,
+	.hw_config		= skx_cha_hw_config,
+	.get_constraint		= skx_cha_get_constraint,
+	.put_constraint		= snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_chabox = {
+	.name			= "cha",
+	.num_counters		= 4,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= HSWEP_C0_MSR_PMON_CTL0,
+	.perf_ctr		= HSWEP_C0_MSR_PMON_CTR0,
+	.event_mask		= HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= HSWEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset		= HSWEP_CBO_MSR_OFFSET,
+	.num_shared_regs	= 1,
+	.constraints		= skx_uncore_chabox_constraints,
+	.ops			= &skx_uncore_chabox_ops,
+	.format_group		= &skx_uncore_chabox_format_group,
+};
+
+static struct attribute *skx_uncore_iio_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh9.attr,
+	&format_attr_ch_mask.attr,
+	&format_attr_fc_mask.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_iio_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_iio_formats_attr,
+};
+
+static struct event_constraint skx_uncore_iio_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x95, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
+	EVENT_CONSTRAINT_END
+};
+
+static void skx_iio_enable_event(struct intel_uncore_box *box,
+				 struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops skx_uncore_iio_ops = {
+	.init_box		= ivbep_uncore_msr_init_box,
+	.disable_box		= snbep_uncore_msr_disable_box,
+	.enable_box		= snbep_uncore_msr_enable_box,
+	.disable_event		= snbep_uncore_msr_disable_event,
+	.enable_event		= skx_iio_enable_event,
+	.read_counter		= uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_iio = {
+	.name			= "iio",
+	.num_counters		= 4,
+	.num_boxes		= 5,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
+	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
+	.event_mask		= SKX_IIO_PMON_RAW_EVENT_MASK,
+	.event_mask_ext		= SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
+	.box_ctl		= SKX_IIO0_MSR_PMON_BOX_CTL,
+	.msr_offset		= SKX_IIO_MSR_OFFSET,
+	.constraints		= skx_uncore_iio_constraints,
+	.ops			= &skx_uncore_iio_ops,
+	.format_group		= &skx_uncore_iio_format_group,
+};
+
+static struct attribute *skx_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_irp = {
+	.name			= "irp",
+	.num_counters		= 2,
+	.num_boxes		= 5,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
+	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,
+	.event_mask		= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SKX_IRP0_MSR_PMON_BOX_CTL,
+	.msr_offset		= SKX_IRP_MSR_OFFSET,
+	.ops			= &skx_uncore_iio_ops,
+	.format_group		= &skx_uncore_format_group,
+};
+
+static struct intel_uncore_ops skx_uncore_pcu_ops = {
+	IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+	.hw_config		= hswep_pcu_hw_config,
+	.get_constraint		= snbep_pcu_get_constraint,
+	.put_constraint		= snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_pcu = {
+	.name			= "pcu",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= HSWEP_PCU_MSR_PMON_CTR0,
+	.event_ctl		= HSWEP_PCU_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= HSWEP_PCU_MSR_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &skx_uncore_pcu_ops,
+	.format_group		= &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *skx_msr_uncores[] = {
+	&skx_uncore_ubox,
+	&skx_uncore_chabox,
+	&skx_uncore_iio,
+	&skx_uncore_irp,
+	&skx_uncore_pcu,
+	NULL,
+};
+
+static int skx_count_chabox(void)
+{
+	struct pci_dev *chabox_dev = NULL;
+	int bus, count = 0;
+
+	while (1) {
+		chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
+		if (!chabox_dev)
+			break;
+		if (count == 0)
+			bus = chabox_dev->bus->number;
+		if (bus != chabox_dev->bus->number)
+			break;
+		count++;
+	}
+
+	pci_dev_put(chabox_dev);
+	return count;
+}
+
+void skx_uncore_cpu_init(void)
+{
+	skx_uncore_chabox.num_boxes = skx_count_chabox();
+	uncore_msr_uncores = skx_msr_uncores;
+}
+
+static struct intel_uncore_type skx_uncore_imc = {
+	.name		= "imc",
+	.num_counters   = 4,
+	.num_boxes	= 6,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+	.event_descs	= hswep_uncore_imc_events,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct attribute *skx_upi_uncore_formats_attr[] = {
+	&format_attr_event_ext.attr,
+	&format_attr_umask_ext.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute_group skx_upi_uncore_format_group = {
+	.name = "format",
+	.attrs = skx_upi_uncore_formats_attr,
+};
+
+static void skx_upi_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+
+	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+	pci_write_config_dword(pdev, SKX_UPI_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_upi_uncore_pci_ops = {
+	.init_box	= skx_upi_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_upi = {
+	.name		= "upi",
+	.num_counters   = 4,
+	.num_boxes	= 3,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SKX_UPI_PCI_PMON_CTR0,
+	.event_ctl	= SKX_UPI_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.event_mask_ext = SKX_PMON_CTL_UMASK_EXT,
+	.box_ctl	= SKX_UPI_PCI_PMON_BOX_CTL,
+	.ops		= &skx_upi_uncore_pci_ops,
+	.format_group	= &skx_upi_uncore_format_group,
+};
+
+static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+
+	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+	pci_write_config_dword(pdev, SKX_M2M_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_m2m_uncore_pci_ops = {
+	.init_box	= skx_m2m_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_m2m = {
+	.name		= "m2m",
+	.num_counters   = 4,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SKX_M2M_PCI_PMON_CTR0,
+	.event_ctl	= SKX_M2M_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SKX_M2M_PCI_PMON_BOX_CTL,
+	.ops		= &skx_m2m_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m2pcie_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m2pcie = {
+	.name		= "m2pcie",
+	.num_counters   = 4,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 48,
+	.constraints	= skx_uncore_m2pcie_constraints,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m3upi_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x1d, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x1e, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x40, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x4e, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x4f, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x50, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x51, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x52, 0x7),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m3upi = {
+	.name		= "m3upi",
+	.num_counters   = 3,
+	.num_boxes	= 3,
+	.perf_ctr_bits	= 48,
+	.constraints	= skx_uncore_m3upi_constraints,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+enum {
+	SKX_PCI_UNCORE_IMC,
+	SKX_PCI_UNCORE_M2M,
+	SKX_PCI_UNCORE_UPI,
+	SKX_PCI_UNCORE_M2PCIE,
+	SKX_PCI_UNCORE_M3UPI,
+};
+
+static struct intel_uncore_type *skx_pci_uncores[] = {
+	[SKX_PCI_UNCORE_IMC]	= &skx_uncore_imc,
+	[SKX_PCI_UNCORE_M2M]	= &skx_uncore_m2m,
+	[SKX_PCI_UNCORE_UPI]	= &skx_uncore_upi,
+	[SKX_PCI_UNCORE_M2PCIE]	= &skx_uncore_m2pcie,
+	[SKX_PCI_UNCORE_M3UPI]	= &skx_uncore_m3upi,
+	NULL,
+};
+
+static const struct pci_device_id skx_uncore_pci_ids[] = {
+	{ /* MC0 Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 2, SKX_PCI_UNCORE_IMC, 0),
+	},
+	{ /* MC0 Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 6, SKX_PCI_UNCORE_IMC, 1),
+	},
+	{ /* MC0 Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 2, SKX_PCI_UNCORE_IMC, 2),
+	},
+	{ /* MC1 Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 2, SKX_PCI_UNCORE_IMC, 3),
+	},
+	{ /* MC1 Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 6, SKX_PCI_UNCORE_IMC, 4),
+	},
+	{ /* MC1 Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 2, SKX_PCI_UNCORE_IMC, 5),
+	},
+	{ /* M2M0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 0, SKX_PCI_UNCORE_M2M, 0),
+	},
+	{ /* M2M1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 0, SKX_PCI_UNCORE_M2M, 1),
+	},
+	{ /* UPI0 Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, SKX_PCI_UNCORE_UPI, 0),
+	},
+	{ /* UPI0 Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, SKX_PCI_UNCORE_UPI, 1),
+	},
+	{ /* UPI1 Link 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, SKX_PCI_UNCORE_UPI, 2),
+	},
+	{ /* M2PCIe 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 1, SKX_PCI_UNCORE_M2PCIE, 0),
+	},
+	{ /* M2PCIe 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 1, SKX_PCI_UNCORE_M2PCIE, 1),
+	},
+	{ /* M2PCIe 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(23, 1, SKX_PCI_UNCORE_M2PCIE, 2),
+	},
+	{ /* M2PCIe 3 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
+	},
+	{ /* M3UPI0 Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+	},
+	{ /* M3UPI0 Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+	},
+	{ /* M3UPI1 Link 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+	},
+	{ /* end: all zeroes */ }
+};
+
+
+static struct pci_driver skx_uncore_pci_driver = {
+	.name		= "skx_uncore",
+	.id_table	= skx_uncore_pci_ids,
+};
+
+int skx_uncore_pci_init(void)
+{
+	/* need to double check pci address */
+	int ret = snbep_pci2phy_map_init(0x2014, SKX_CPUNODEID, SKX_GIDNIDMAP, false);
+
+	if (ret)
+		return ret;
+
+	uncore_pci_uncores = skx_pci_uncores;
+	uncore_pci_driver = &skx_uncore_pci_driver;
+	return 0;
+}
+
+/* end of SKX uncore support */
-- 
cgit v0.10.2


From 7a023fd239498edd269838784f12888147d970da Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 9 Sep 2016 12:00:08 -0300
Subject: perf probe: Fix dwarf regs table for x86_64

In 293d5b439483 ("perf probe: Support probing on offline cross-arch binary")
DWARF register tables were introduced for many architectures, with the one for
the "dx" register being broken for x86_64, which got noticed by the 'perf test
bpf' testcase, that has this difference from a successful run to one that
fails, with the aforementioned patch:

  -Writing event: p:perf_bpf_probe/func _text+5197232 f_mode=+68(%di):x32 offset=%si:s64 orig=dx:s32
  -Failed to write event: Invalid argument
  -bpf_probe: failed to apply perf probe eventsFailed to add events selected by BPF
  +Writing event: p:perf_bpf_probe/func _text+5197232 f_mode=+68(%di):x32 offset=%si:s64 orig=%dx:s32

Add the missing '%' to '%dx' to fix this.

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 293d5b439483 ("perf probe: Support probing on offline cross-arch binary")
Link: https://lkml.kernel.org/r/20160909145955.GC32585@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/x86/include/dwarf-regs-table.h b/tools/perf/arch/x86/include/dwarf-regs-table.h
index 39ac7cb..9b5e5cb 100644
--- a/tools/perf/arch/x86/include/dwarf-regs-table.h
+++ b/tools/perf/arch/x86/include/dwarf-regs-table.h
@@ -7,7 +7,7 @@ static const char * const x86_32_regstr_tbl[] = {
 };
 
 static const char * const x86_64_regstr_tbl[] = {
-	"%ax", "dx", "%cx", "%bx", "%si", "%di",
+	"%ax", "%dx", "%cx", "%bx", "%si", "%di",
 	"%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
 	"%r12", "%r13", "%r14", "%r15",
 };
-- 
cgit v0.10.2


From d9ea48bc4e7cc297ca1073fa3f90ed80d964b7b4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 12 Sep 2016 15:19:52 +0900
Subject: perf hists browser: Fix event group display

Milian reported that the event group on TUI shows duplicated overhead.
This was due to a bug on calculating hpp->buf position.  The
hpp_advance() was called from __hpp__slsmg_color_printf() on TUI but
it's already called from the hpp__call_print_fn macro in __hpp__fmt().
The end result is that the print function returns number of bytes it
printed but the buffer advanced twice of the length.

This is generally not a problem since it doesn't need to access the
buffer again.  But with event group, overhead needs to be printed
multiple times and hist_entry__snprintf_alignment() tries to fill the
space with buffer after it printed.  So it (brokenly) showed the last
overhead again.

The bug was there from the beginning, but I think it's only revealed
when the alignment function was added.

Reported-by: Milian Wolff <milian.wolff@kdab.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Fixes: 89fee7094323 ("perf hists: Do column alignment on the format iterator")
Link: http://lkml.kernel.org/r/20160912061958.16656-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f0611c9..35e44b1 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1097,7 +1097,6 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
 	ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
 	ui_browser__printf(arg->b, "%s", hpp->buf);
 
-	advance_hpp(hpp, ret);
 	return ret;
 }
 
-- 
cgit v0.10.2


From f3539c12d8196ce0a1993364d30b3a18908470d1 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Mon, 12 Sep 2016 12:54:29 +0000
Subject: tools include: Add uapi mman.h for each architecture

Some mmap related macros have different values for different
architectures. This patch introduces uapi mman.h for each
architectures.

Three headers are cloned from kernel include to tools/include:

 tools/include/uapi/asm-generic/mman-common.h
 tools/include/uapi/asm-generic/mman.h
 tools/include/uapi/linux/mman.h

The main part of this patch is generated by following script:

 macros=`cat $0 | awk 'V==1 {print}; /^# start macro list/ {V=1}'`
 for arch in `ls tools/arch`
 do
   [ -d tools/arch/$arch/include/uapi/asm ] || mkdir -p tools/arch/$arch/include/uapi/asm
   src=arch/$arch/include/uapi/asm/mman.h
   target=tools/arch/$arch/include/uapi/asm/mman.h
   guard="TOOLS_ARCH_"`echo $arch | awk '{print toupper($0)}'`_UAPI_ASM_MMAN_FIX_H
   echo '#ifndef '$guard > $target
   echo '#define '$guard >> $target

   [ -f $src ] &&
   for m in $macros
   do
     if grep '#define[ \t]*'$m $src > /dev/null 2>&1
     then
       grep -h '#define[ \t]*'$m $src | sed 's/[ \t]*\/\*.*$//g' >> $target
     fi
   done

   if [ -f $src ]
   then
      grep '#include <asm-generic' $src >> $target
   else
      echo "#include <asm-generic/mman.h>" >> $target
   fi
   echo '#endif' >> $target
   echo "$target"
 done

 exit 0
 # Following macros are extracted from:
 # tools/perf/trace/beauty/mmap.c
 #
 # start macro list
 MADV_DODUMP
 MADV_DOFORK
 MADV_DONTDUMP
 MADV_DONTFORK
 MADV_DONTNEED
 MADV_HUGEPAGE
 MADV_HWPOISON
 MADV_MERGEABLE
 MADV_NOHUGEPAGE
 MADV_NORMAL
 MADV_RANDOM
 MADV_REMOVE
 MADV_SEQUENTIAL
 MADV_SOFT_OFFLINE
 MADV_UNMERGEABLE
 MADV_WILLNEED
 MAP_32BIT
 MAP_ANONYMOUS
 MAP_DENYWRITE
 MAP_EXECUTABLE
 MAP_FILE
 MAP_FIXED
 MAP_GROWSDOWN
 MAP_HUGETLB
 MAP_LOCKED
 MAP_NONBLOCK
 MAP_NORESERVE
 MAP_POPULATE
 MAP_PRIVATE
 MAP_SHARED
 MAP_STACK
 MAP_UNINITIALIZED
 MREMAP_FIXED
 MREMAP_MAYMOVE
 PROT_EXEC
 PROT_GROWSDOWN
 PROT_GROWSUP
 PROT_NONE
 PROT_READ
 PROT_SEM
 PROT_WRITE

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1473684871-209320-2-git-send-email-wangnan0@huawei.com
[ Added new files to tools/perf/MANIFEST to fix the detached tarball build, add mman.h for ARC ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
new file mode 100644
index 0000000..6ed4ad4
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -0,0 +1,38 @@
+#ifndef TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	6
+#define MADV_HUGEPAGE	14
+#define MADV_MERGEABLE   12
+#define MADV_NOHUGEPAGE	15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL	2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x10
+#define MAP_DENYWRITE	0x02000
+#define MAP_EXECUTABLE	0x04000
+#define MAP_FILE	0
+#define MAP_FIXED	0x100
+#define MAP_GROWSDOWN	0x01000
+#define MAP_HUGETLB	0x100000
+#define MAP_LOCKED	0x08000
+#define MAP_NONBLOCK	0x40000
+#define MAP_NORESERVE	0x10000
+#define MAP_POPULATE	0x20000
+#define MAP_PRIVATE	0x02
+#define MAP_SHARED	0x01
+#define MAP_STACK	0x80000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x8
+#define PROT_WRITE	0x2
+#endif
diff --git a/tools/arch/arc/include/uapi/asm/mman.h b/tools/arch/arc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..f76765d
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/arm/include/uapi/asm/mman.h b/tools/arch/arm/include/uapi/asm/mman.h
new file mode 100644
index 0000000..f200638
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/arm64/include/uapi/asm/mman.h b/tools/arch/arm64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..a7dd975
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/frv/include/uapi/asm/mman.h b/tools/arch/frv/include/uapi/asm/mman.h
new file mode 100644
index 0000000..99bba05
--- /dev/null
+++ b/tools/arch/frv/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/h8300/include/uapi/asm/mman.h b/tools/arch/h8300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..df95096
--- /dev/null
+++ b/tools/arch/h8300/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/mman.h b/tools/arch/hexagon/include/uapi/asm/mman.h
new file mode 100644
index 0000000..f1adcce
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..23420eb
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/m32r/include/uapi/asm/mman.h b/tools/arch/m32r/include/uapi/asm/mman.h
new file mode 100644
index 0000000..a35ebd6
--- /dev/null
+++ b/tools/arch/m32r/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/microblaze/include/uapi/asm/mman.h b/tools/arch/microblaze/include/uapi/asm/mman.h
new file mode 100644
index 0000000..75f460a
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
new file mode 100644
index 0000000..db88fa4
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -0,0 +1,39 @@
+#ifndef TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP	16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	4
+#define MADV_HUGEPAGE	14
+#define MADV_HWPOISON	 100
+#define MADV_MERGEABLE	 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x0800
+#define MAP_DENYWRITE	0x2000
+#define MAP_EXECUTABLE	0x4000
+#define MAP_FILE	0
+#define MAP_FIXED	0x010
+#define MAP_GROWSDOWN	0x1000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x8000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x002
+#define MAP_SHARED	0x001
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x04
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x00
+#define PROT_READ	0x01
+#define PROT_SEM	0x10
+#define PROT_WRITE	0x02
+#endif
diff --git a/tools/arch/mn10300/include/uapi/asm/mman.h b/tools/arch/mn10300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..81faa5d
--- /dev/null
+++ b/tools/arch/mn10300/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..c4a9d9f
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -0,0 +1,38 @@
+#ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	70
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   69
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED   4
+#define MADV_HUGEPAGE	67
+#define MADV_MERGEABLE   65
+#define MADV_NOHUGEPAGE	68
+#define MADV_NORMAL     0
+#define MADV_RANDOM     1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 66
+#define MADV_WILLNEED   3
+#define MAP_ANONYMOUS	0x10
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_FILE	0
+#define MAP_FIXED	0x04
+#define MAP_GROWSDOWN	0x8000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x2000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x4000
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x02
+#define MAP_SHARED	0x01
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x8
+#define PROT_WRITE	0x2
+#endif
diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..7a56ab9
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/mman.h
@@ -0,0 +1,13 @@
+#ifndef TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0100
+#define MAP_HUGETLB	0x40000
+#define MAP_LOCKED	0x80
+#define MAP_NONBLOCK	0x10000
+#define MAP_NORESERVE   0x40
+#define MAP_POPULATE	0x8000
+#define MAP_STACK	0x20000
+#include <uapi/asm-generic/mman-common.h>
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/mman.h b/tools/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 0000000..fe53b91
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/score/include/uapi/asm/mman.h b/tools/arch/score/include/uapi/asm/mman.h
new file mode 100644
index 0000000..ba1ee9c
--- /dev/null
+++ b/tools/arch/score/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/sh/include/uapi/asm/mman.h b/tools/arch/sh/include/uapi/asm/mman.h
new file mode 100644
index 0000000..5a47d8c
--- /dev/null
+++ b/tools/arch/sh/include/uapi/asm/mman.h
@@ -0,0 +1,4 @@
+#ifndef TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b88f3ac
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/mman.h
@@ -0,0 +1,13 @@
+#ifndef TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0200
+#define MAP_HUGETLB	0x40000
+#define MAP_LOCKED      0x100
+#define MAP_NONBLOCK	0x10000
+#define MAP_NORESERVE   0x40
+#define MAP_POPULATE	0x8000
+#define MAP_STACK	0x20000
+#include <uapi/asm-generic/mman-common.h>
+#endif
diff --git a/tools/arch/tile/include/uapi/asm/mman.h b/tools/arch/tile/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b0a054f
--- /dev/null
+++ b/tools/arch/tile/include/uapi/asm/mman.h
@@ -0,0 +1,13 @@
+#ifndef TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0100
+#define MAP_HUGETLB	0x4000
+#define MAP_LOCKED	0x0200
+#define MAP_NONBLOCK	0x0080
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x0040
+#define MAP_STACK	MAP_GROWSDOWN
+#include <uapi/asm-generic/mman-common.h>
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/mman.h b/tools/arch/x86/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b73c1af
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/mman.h
@@ -0,0 +1,5 @@
+#ifndef TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define MAP_32BIT	0x40
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
new file mode 100644
index 0000000..1c89538
--- /dev/null
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -0,0 +1,38 @@
+#ifndef TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	4
+#define MADV_HUGEPAGE	14
+#define MADV_MERGEABLE   12
+#define MADV_NOHUGEPAGE	15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL	2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x0800
+#define MAP_DENYWRITE	0x2000
+#define MAP_EXECUTABLE	0x4000
+#define MAP_FILE	0
+#define MAP_FIXED	0x010
+#define MAP_GROWSDOWN	0x1000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x8000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x002
+#define MAP_SHARED	0x001
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x10
+#define PROT_WRITE	0x2
+#endif
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
new file mode 100644
index 0000000..5827438
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -0,0 +1,75 @@
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
+#endif
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE	8		/* free pages only if memory pressure */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+#define MADV_HWPOISON	100		/* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
+
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
new file mode 100644
index 0000000..10fa785
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <uapi/asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
new file mode 100644
index 0000000..81d8edf
--- /dev/null
+++ b/tools/include/uapi/linux/mman.h
@@ -0,0 +1,13 @@
+#ifndef _UAPI_LINUX_MMAN_H
+#define _UAPI_LINUX_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#define MREMAP_MAYMOVE	1
+#define MREMAP_FIXED	2
+
+#define OVERCOMMIT_GUESS		0
+#define OVERCOMMIT_ALWAYS		1
+#define OVERCOMMIT_NEVER		2
+
+#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ff200c6..0bda2cc 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -66,9 +66,12 @@ tools/include/linux/hash.h
 tools/include/linux/kernel.h
 tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/uapi/asm-generic/mman-common.h
+tools/include/uapi/asm-generic/mman.h
 tools/include/uapi/linux/bpf.h
 tools/include/uapi/linux/bpf_common.h
 tools/include/uapi/linux/hw_breakpoint.h
+tools/include/uapi/linux/mman.h
 tools/include/uapi/linux/perf_event.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h
@@ -79,4 +82,5 @@ tools/include/linux/types.h
 tools/include/linux/err.h
 tools/include/linux/bitmap.h
 tools/include/linux/time64.h
+tools/arch/*/include/uapi/asm/mman.h
 tools/arch/*/include/uapi/asm/perf_regs.h
-- 
cgit v0.10.2


From 277cf08f3febd9d0921f4ff1b46fd5d294fe3942 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Sep 2016 16:27:32 -0300
Subject: perf trace beauty mmap: Fix defines for non !x86_64

Several defines have different values in different arches, so we can't
just define it to the x86_64 value, use uapi/linux/mmap.h that was
recently introduced to reliably find those, not using possibly outdated
libc headers.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-4eajp5yp8i2fuw44n7jmcg5t@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index d0a3a8e..3629b45 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,8 +1,4 @@
-#include <sys/mman.h>
-
-#ifndef PROT_SEM
-#define PROT_SEM 0x8
-#endif
+#include <uapi/linux/mman.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
@@ -33,31 +29,6 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
-#ifndef MAP_FIXED
-#define MAP_FIXED		     0x10
-#endif
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS		     0x20
-#endif
-
-#ifndef MAP_32BIT
-#define MAP_32BIT		     0x40
-#endif
-
-#ifndef MAP_STACK
-#define MAP_STACK		  0x20000
-#endif
-
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB		  0x40000
-#endif
-
-#ifndef MAP_UNINITIALIZED
-#define MAP_UNINITIALIZED	0x4000000
-#endif
-
-
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
@@ -95,13 +66,6 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
-#ifndef MREMAP_MAYMOVE
-#define MREMAP_MAYMOVE 1
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED 2
-#endif
-
 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 						  struct syscall_arg *arg)
 {
@@ -125,39 +89,6 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 
 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 
-#ifndef MADV_HWPOISON
-#define MADV_HWPOISON		100
-#endif
-
-#ifndef MADV_SOFT_OFFLINE
-#define MADV_SOFT_OFFLINE	101
-#endif
-
-#ifndef MADV_MERGEABLE
-#define MADV_MERGEABLE		 12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-#define MADV_UNMERGEABLE	 13
-#endif
-
-#ifndef MADV_HUGEPAGE
-#define MADV_HUGEPAGE		 14
-#endif
-
-#ifndef MADV_NOHUGEPAGE
-#define MADV_NOHUGEPAGE		 15
-#endif
-
-#ifndef MADV_DONTDUMP
-#define MADV_DONTDUMP		 16
-#endif
-
-#ifndef MADV_DODUMP
-#define MADV_DODUMP		 17
-#endif
-
-
 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 						      struct syscall_arg *arg)
 {
-- 
cgit v0.10.2


From fbef103fad5009827965b10aedbecb1786904f4d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Sep 2016 16:47:57 -0300
Subject: perf tools: Do hugetlb handling in more systems

The csets:

  0ac3348e5024 ("perf tools: Recognize hugetlb mapping as anon mapping")
  d7e404af115b ("perf record: Mark MAP_HUGETLB when synthesizing mmap events")

Added code conditional on MAP_HUGETLB, to make it build in older systems
where that define wasn't available. Now that we grabbed copies of
uapi/linux/mmap.h to have all those definitions in tools/, use it so
that we can support building the tools for older systems (without the
MAP_HUGETLB define in its libc headers) using new kernels that support
such maps.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-wv6oqbfkpxbix4umj2kcfmaz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6c30171..2880e22 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,5 @@
 #include <linux/types.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
 #include "event.h"
 #include "debug.h"
@@ -249,10 +249,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
-#ifdef MAP_HUGETLB
 	const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
 	int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
-#endif
 
 	if (machine__is_default_guest(machine))
 		return 0;
@@ -347,12 +345,11 @@ out:
 
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
-#ifdef MAP_HUGETLB
+
 		if (!strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
 			strcpy(execname, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
-#endif
 
 		size = strlen(execname) + 1;
 		memcpy(event->mmap2.filename, execname, size);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index d51a125..c662fef 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -6,7 +6,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include "map.h"
 #include "thread.h"
 #include "strlist.h"
@@ -27,12 +27,7 @@ const char *map_type__name[MAP__NR_TYPES] = {
 
 static inline int is_anon_memory(const char *filename, u32 flags)
 {
-	u32 anon_flags = 0;
-
-#ifdef MAP_HUGETLB
-	anon_flags |= MAP_HUGETLB;
-#endif
-	return flags & anon_flags ||
+	return flags & MAP_HUGETLB ||
 	       !strcmp(filename, "//anon") ||
 	       !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
 	       !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
-- 
cgit v0.10.2


From 0a4a7e435f458e996d0f21a9ebc964e0b0d64eba Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Mon, 12 Sep 2016 12:54:31 +0000
Subject: perf build: Compare mman.h related headers against kernel originals

As with other cloned headers, compare the newly introduced mman related
headers against their source copy in kernel tree.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1473684871-209320-4-git-send-email-wangnan0@huawei.com
[ Added -I to ignore the uapi/ difference ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 828cfd7..d710db1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -432,6 +432,15 @@ $(PERF_IN): prepare FORCE
 	@(test -f ../../include/linux/coresight-pmu.h && ( \
 	(diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
 	|| echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/asm-generic/mman-common.h && ( \
+	(diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/asm-generic/mman.h && ( \
+	(diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/linux/mman.h && ( \
+	(diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
-- 
cgit v0.10.2


From 09034de63e427a86ba96bedf39410eef7c9014a5 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:46 +0900
Subject: perf hists: Introduce hists__match_hierarchy()

The hists__match_hierarchy() is to find matching hist entries in a
group.  A matching entry has the same values for all sort keys given.

With an event group (e.g.: -e "{cycles,instructions}"), a leader event
should show other members in a group.  So each entry in the leader
should be able to find its pair entries which have same values.

With hierarchy mode, it needs to search all matching children in a
hierarchy.

An example output looks like:

  #               Overhead  Command / Shared Object / Symbol
  # ......................  ..................................
  #
      25.74%  27.18%        sh
         19.96%  24.14%        libc-2.24.so
            9.55%  14.64%        [.] __strcmp_sse2
            1.54%   0.00%        [.] __tfind
            1.07%   1.13%        [.] _int_malloc
  ...

In the above example, two overheads are shown - one for the leader and
another for the other group member.  They were matched since their
command, dso and symbol have the same values.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160913074552.13284-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index de15dbc..be3f5ce 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2174,6 +2174,51 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
 	return NULL;
 }
 
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+						      struct hist_entry *he)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct hist_entry *iter;
+		struct perf_hpp_fmt *fmt;
+		int64_t cmp = 0;
+
+		iter = rb_entry(n, struct hist_entry, rb_node_in);
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root *leader_root,
+				   struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_hierarchy_entry(other_root, pos);
+
+		if (pair) {
+			hist_entry__add_pair(pair, pos);
+			hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+		}
+	}
+}
+
 /*
  * Look for pairs to link to the leader buckets (hist_entries):
  */
@@ -2183,6 +2228,12 @@ void hists__match(struct hists *leader, struct hists *other)
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__match_hierarchy(&leader->entries_collapsed,
+					      &other->entries_collapsed);
+	}
+
 	if (hists__has(leader, need_collapse))
 		root = &leader->entries_collapsed;
 	else
-- 
cgit v0.10.2


From 9d97b8f512a0dd41819b8e3d9cdc7a59199e1b0c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:47 +0900
Subject: perf hists: Introduce hists__link_hierarchy()

The hists__link_hierarchy() is to support hierarchy reports with an
event group.  When it matches the leader event and the other members
(using hists__match_hierarchy()), it also needs to link unmatched member
entries with a dummy leader event so that it can show up in the output.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160913074552.13284-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index be3f5ce..702ba3a 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2149,6 +2149,50 @@ out:
 	return he;
 }
 
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+						    struct rb_root *root,
+						    struct hist_entry *pair)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+
+	p = &root->rb_node;
+	while (*p != NULL) {
+		int64_t cmp = 0;
+
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, he, pair);
+			if (cmp)
+				break;
+		}
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color(&he->rb_node_in, root);
+
+		he->dummy = true;
+		he->hists = hists;
+		memset(&he->stat, 0, sizeof(he->stat));
+		hists__inc_stats(hists, he);
+	}
+out:
+	return he;
+}
+
 static struct hist_entry *hists__find_entry(struct hists *hists,
 					    struct hist_entry *he)
 {
@@ -2248,6 +2292,50 @@ void hists__match(struct hists *leader, struct hists *other)
 	}
 }
 
+static int hists__link_hierarchy(struct hists *leader_hists,
+				 struct hist_entry *parent,
+				 struct rb_root *leader_root,
+				 struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *leader;
+
+	for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(pos)) {
+			bool found = false;
+
+			list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+				if (leader->hists == leader_hists) {
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+				return -1;
+		} else {
+			leader = add_dummy_hierarchy_entry(leader_hists,
+							   leader_root, pos);
+			if (leader == NULL)
+				return -1;
+
+			/* do not point parent in the pos */
+			leader->parent_he = parent;
+
+			hist_entry__add_pair(pos, leader);
+		}
+
+		if (!pos->leaf) {
+			if (hists__link_hierarchy(leader_hists, leader,
+						  &leader->hroot_in,
+						  &pos->hroot_in) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
 /*
  * Look for entries in the other hists that are not present in the leader, if
  * we find them, just add a dummy entry on the leader hists, with period=0,
@@ -2259,6 +2347,13 @@ int hists__link(struct hists *leader, struct hists *other)
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__link_hierarchy(leader, NULL,
+					     &leader->entries_collapsed,
+					     &other->entries_collapsed);
+	}
+
 	if (hists__has(other, need_collapse))
 		root = &other->entries_collapsed;
 	else
-- 
cgit v0.10.2


From d2580c7a5b4e78bffda1e53cfd583e7a2c7383a5 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:48 +0900
Subject: perf hist: Initialize hierarchy tree explicitly

The hroot_in and hroot_out are roots of hierarchy trees of hist entries.

But when a hist entry is initialized by copying existing template entry,
it sometimes has non-empty tree and copies it incorrectly.  This is a
problem especially when an event group is used since it creates dummy
entries from already-processed entries in other event members.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160913074552.13284-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 702ba3a..37a08f2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -417,6 +417,8 @@ static int hist_entry__init(struct hist_entry *he,
 	}
 	INIT_LIST_HEAD(&he->pairs.node);
 	thread__get(he->thread);
+	he->hroot_in  = RB_ROOT;
+	he->hroot_out = RB_ROOT;
 
 	if (!symbol_conf.report_hierarchy)
 		he->leaf = true;
-- 
cgit v0.10.2


From 9a6ad25b5a2026ba1399abc879ec623957867e79 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:49 +0900
Subject: perf ui/stdio: Always reset output width for hierarchy

When the --hierarchy option is used, each entry has its own hpp_list to
show the result.  But it is not updating the width of each column for
perf-top.  The perf-report command has no problem since it resets it
during header display.

  $ sudo perf top --hierarchy --stdio

   PerfTop:     160 irqs/sec  kernel:38.8%  exact: 100.0%
                                     [4000Hz cycles:pp],  (all, 12 CPUs)
  ----------------------------------------------------------------------

   52.32%     perf
      24.74%     [.] __symbols__insert
      5.62%     [.] rb_next
      5.14%     [.] dso__load_sym

Move the code into hists__fprintf() so that it can be called always.
Also it'd be better to put similar code together.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Fixes: 1b2dbbf41a0f ("perf hists: Use own hpp_list for hierarchy mode")
Link: http://lkml.kernel.org/r/20160913074552.13284-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9b65f4a..18b4fd9 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -628,14 +628,6 @@ hists__fprintf_hierarchy_headers(struct hists *hists,
 				 struct perf_hpp *hpp,
 				 FILE *fp)
 {
-	struct perf_hpp_list_node *fmt_node;
-	struct perf_hpp_fmt *fmt;
-
-	list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
-		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
-			perf_hpp__reset_width(fmt, hists);
-	}
-
 	return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
 }
 
@@ -733,6 +725,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      bool use_callchain)
 {
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *node;
 	struct rb_node *nd;
 	size_t ret = 0;
 	const char *sep = symbol_conf.field_sep;
@@ -745,6 +738,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
 	hists__for_each_format(hists, fmt)
 		perf_hpp__reset_width(fmt, hists);
+	/* hierarchy entries have their own hpp list */
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
 
 	if (symbol_conf.col_width_list_str)
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
-- 
cgit v0.10.2


From 195bc0f8443d8d564ae95d2e9c19ac0edfd647c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:50 +0900
Subject: perf ui/stdio: Rename print_hierarchy_header()

Now the hists__fprintf_hierarchy_headers() is a simple wrapper passing
field separator.  Let's do it directly.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160913074552.13284-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 18b4fd9..a57131e 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -528,8 +528,8 @@ static int print_hierarchy_indent(const char *sep, int indent,
 	return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
 }
 
-static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
-				  const char *sep, FILE *fp)
+static int hists__fprintf_hierarchy_headers(struct hists *hists,
+					    struct perf_hpp *hpp, FILE *fp)
 {
 	bool first_node, first_col;
 	int indent;
@@ -538,6 +538,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 	unsigned header_width = 0;
 	struct perf_hpp_fmt *fmt;
 	struct perf_hpp_list_node *fmt_node;
+	const char *sep = symbol_conf.field_sep;
 
 	indent = hists->nr_hpp_node;
 
@@ -623,14 +624,6 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
 	return 2;
 }
 
-static int
-hists__fprintf_hierarchy_headers(struct hists *hists,
-				 struct perf_hpp *hpp,
-				 FILE *fp)
-{
-	return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
-}
-
 static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
 			 int line, FILE *fp)
 {
-- 
cgit v0.10.2


From 30d476ae738d1ce33f170dd79398ecd211274df6 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 13 Sep 2016 16:45:52 +0900
Subject: perf report: Enable group view with hierarchy

Now that all the missing pieces are implemented, let's enable it.  An
example output below:

  $ perf record -e '{cycles,instructions}' make
  $ perf report --hierarchy --stdio
  ...
  #               Overhead  Command / Shared Object / Symbol
  # ......................  ..................................
  #
  ...
      25.74%  27.18%        sh
         19.96%  24.14%        libc-2.24.so
            9.55%  14.64%        [.] __strcmp_sse2
            1.54%   0.00%        [.] __tfind
            1.07%   1.13%        [.] _int_malloc
            0.95%   0.00%        [.] __strchr_sse2
            0.89%   1.39%        [.] __tsearch
            0.76%   0.00%        [.] strlen
  ...

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Requested-by: Andi Kleen <andi@firstfloor.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160913074552.13284-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1a07c4c..6e88460 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -935,7 +935,6 @@ repeat:
 
 	if (symbol_conf.report_hierarchy) {
 		/* disable incompatible options */
-		symbol_conf.event_group = false;
 		symbol_conf.cumulate_callchain = false;
 
 		if (field_order) {
-- 
cgit v0.10.2


From dd60fba7324572498d91163e96b1cfe5cd5f7f3b Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Tue, 6 Sep 2016 10:37:15 -0600
Subject: perf tools: Add infrastructure for PMU specific configuration

This patch adds PMU driver specific configuration to the parser
infrastructure by preceding any term with the '@' letter.  As such doing
something like:

perf record -e some_event/@cfg1,@cfg2=config/ ...

will see 'cfg1' and 'cfg2=config' being added to the list of evsel
config terms.  Token 'cfg1' and 'cfg2=config' are not processed in user
space and are meant to be interpreted by the PMU driver.

First the lexer/parser are supplemented with the required definitions to
recognise the driver specific configuration.  From there they are simply
added to the list of event terms.  The bulk of the work is done in
function "parse_events_add_pmu()" where driver config event terms are
added to a new list of driver config terms, which in turn spliced with
the event's new driver configuration list.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1473179837-3293-4-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 379a2be..1a24f4d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -60,6 +60,18 @@ OPTIONS
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
 
+	  Also not defined in .../<pmu>/format/* are PMU driver specific
+	  configuration parameters.  Any configuration parameter preceded by
+	  the letter '@' is not interpreted in user space and sent down directly
+	  to the PMU driver.  For example:
+
+	  perf record -e some_event/@cfg1,@cfg2=config/ ...
+
+	  will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
+	  with the event for further processing.  There is no restriction on
+	  what the configuration parameters are, as long as their semantic is
+	  understood and supported by the PMU driver.
+
         - a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
           where addr is the address in memory you want to break in.
           Access is the memory access type (read, write, execute) it can
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4d44129..3238060 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@ enum {
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
 	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
 	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
 	PERF_EVSEL__CONFIG_TERM_MAX,
 };
 
@@ -57,6 +58,7 @@ struct perf_evsel_config_term {
 		u64	freq;
 		bool	time;
 		char	*callgraph;
+		char	*drv_cfg;
 		u64	stack_user;
 		int	max_stack;
 		bool	inherit;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6c913c3..2eb8b1e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -904,6 +904,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
 	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
 	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
 	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
+	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
 };
 
 static bool config_term_shrinked;
@@ -1034,7 +1035,8 @@ static int config_term_pmu(struct perf_event_attr *attr,
 			   struct parse_events_term *term,
 			   struct parse_events_error *err)
 {
-	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER)
+	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+	    term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
 		/*
 		 * Always succeed for sysfs terms, as we dont know
 		 * at this point what type they need to have.
@@ -1134,6 +1136,9 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+			ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+			break;
 		default:
 			break;
 		}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d1edbf8..8d09a97 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
 	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
 	__PARSE_EVENTS__TERM_TYPE_NR,
 };
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7a25194..9f43fda 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,6 +53,26 @@ static int str(yyscan_t scanner, int token)
 	return token;
 }
 
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * 	@cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison.  In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	/* Strip off the '@' */
+	yylval->str = strdup(text + 1);
+	return token;
+}
+
 #define REWIND(__alloc)				\
 do {								\
 	YYSTYPE *__yylval = parse_events_get_lval(yyscanner);	\
@@ -124,6 +144,7 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?.]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
 /* If you add a modifier you need to update check_modifier() */
 modifier_event	[ukhpPGHSDI]+
 modifier_bp	[rwx]{1,3}
@@ -209,6 +230,7 @@ no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
 \[all\]			{ return PE_ARRAY_ALL; }
 "["			{ BEGIN(array); return '['; }
+@{drv_cfg_term}		{ return drv_str(yyscanner, PE_DRV_CFG_TERM); }
 }
 
 <mem>{
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 5be4a5f..879115f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -49,6 +49,7 @@ static void inc_group_count(struct list_head *list,
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -63,6 +64,7 @@ static void inc_group_count(struct list_head *list,
 %type <str> PE_MODIFIER_BP
 %type <str> PE_EVENT_NAME
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_DRV_CFG_TERM
 %type <num> value_sym
 %type <head> event_config
 %type <head> opt_event_config
@@ -599,6 +601,15 @@ PE_NAME array '=' PE_VALUE
 	term->array = $2;
 	$$ = term;
 }
+|
+PE_DRV_CFG_TERM
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+					$1, $1, &@1, NULL));
+	$$ = term;
+}
 
 array:
 '[' array_terms ']'
-- 
cgit v0.10.2


From f752e90e9c00da2a1994e2e1b1a04505b46a6a4c Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 14 Sep 2016 10:57:28 +0000
Subject: perf trace beauty mmap: Add missing MADV_FREE

tools/perf/trace/beauty/mmap.c forgets to check MADV_FREE.
This patch fixes it.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1473850649-83389-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 3629b45..fd710ab 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -101,6 +101,7 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 	P_MADV_BHV(SEQUENTIAL);
 	P_MADV_BHV(WILLNEED);
 	P_MADV_BHV(DONTNEED);
+	P_MADV_BHV(FREE);
 	P_MADV_BHV(REMOVE);
 	P_MADV_BHV(DONTFORK);
 	P_MADV_BHV(DOFORK);
-- 
cgit v0.10.2


From f82b77462b8680b84e8cce955b05a6629cb44b36 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 14 Sep 2016 10:57:29 +0000
Subject: tools include: Add mman macros needed by perf for all arch

Some macros required by tools/perf/trace/beauty/mmap.c is not support
for all architectures. For example, MAP_32BIT is defined on x86 only,
alpha doesn't define MADV_HWPOISON and MADV_SOFT_OFFLINE.

This patch regenerates mman.h for each arch, defines these missing
macros for perf. For missing MADV_*, fall back to asm-generic/mman-common
because they are in a 'case ...' statement. For flags, define it to 0.

Following is the script to generate this patch:

 macros=`cat $0 | awk 'V==1 {print}; /^# start macro list/ {V=1}'`
 rm `find ./tools/arch/ -name mman.h`
 for arch in `ls tools/arch`
 do
   [ -d tools/arch/$arch/include/uapi/asm ] || mkdir -p tools/arch/$arch/include/uapi/asm
   src=arch/$arch/include/uapi/asm/mman.h
   target=tools/arch/$arch/include/uapi/asm/mman.h.tmp
   real_target=tools/arch/$arch/include/uapi/asm/mman.h
   guard="TOOLS_ARCH_"`echo $arch | awk '{print toupper($0)}'`_UAPI_ASM_MMAN_FIX_H
   rm -f $target

   [ -f $src ] &&
   for m in $macros
   do
     if grep '#define[ \t]*'$m $src > /dev/null 2>&1
     then
       grep -h '#define[ \t]*'$m $src | sed 's/[ \t]*\/\*.*$//g' >> $target
     fi
   done

   if [ -f $src ]
   then
      grep '#include <asm-generic' $src >> $target
   else
      echo "#include <asm-generic/mman.h>" >> $target
   fi

   touch $real_target
   for m in $macros
   do
     if cat << EOF | gcc -Itools/arch/$arch/include -Itools/arch/$arch/include/uapi -Iinclude/ -Iinclude/uapi -E - | grep $m > /dev/null 2>&1
 #include <uapi/asm/mman.h.tmp>
 #include <uapi/linux/mman.h>
 $m
 EOF
   then
     echo "Fixing $m for $arch"
     echo "/* $m is undefined on $arch, fix it for perf */" >> $target
     if echo $m | grep '^MADV_' > /dev/null 2>&1
     then
       grep -h '#define[ \t]*'$m include/uapi/asm-generic/mman-common.h | sed 's/[ \t]*\/\*.*$//g' >> $target
     else
       echo "#define $m	0" >> $target
     fi
   fi
   done

   real_target=tools/arch/$arch/include/uapi/asm/mman.h
   echo '#ifndef '$guard > $real_target
   echo '#define '$guard >> $real_target
   cat $target | sed 's|asm-generic|uapi/asm-generic|g' >> $real_target
   echo '#endif' >> $real_target
   rm $target
   echo "$real_target"
 done

 exit 0
 # Following macros are extracted from:
 # tools/perf/trace/beauty/mmap.c
 #
 # start macro list
 MADV_DODUMP
 MADV_DOFORK
 MADV_DONTDUMP
 MADV_DONTFORK
 MADV_DONTNEED
 MADV_FREE
 MADV_HUGEPAGE
 MADV_HWPOISON
 MADV_MERGEABLE
 MADV_NOHUGEPAGE
 MADV_NORMAL
 MADV_RANDOM
 MADV_REMOVE
 MADV_SEQUENTIAL
 MADV_SOFT_OFFLINE
 MADV_UNMERGEABLE
 MADV_WILLNEED
 MAP_32BIT
 MAP_ANONYMOUS
 MAP_DENYWRITE
 MAP_EXECUTABLE
 MAP_FILE
 MAP_FIXED
 MAP_GROWSDOWN
 MAP_HUGETLB
 MAP_LOCKED
 MAP_NONBLOCK
 MAP_NORESERVE
 MAP_POPULATE
 MAP_PRIVATE
 MAP_SHARED
 MAP_STACK
 MAP_UNINITIALIZED
 MREMAP_FIXED
 MREMAP_MAYMOVE
 PROT_EXEC
 PROT_GROWSDOWN
 PROT_GROWSUP
 PROT_NONE
 PROT_READ
 PROT_SEM
 PROT_WRITE

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Kim Phillips <kim.phillips@arm.com>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Fixes: 277cf08f3feb ("perf trace beauty mmap: Fix defines for non !x86_64")
Link: http://lkml.kernel.org/r/1473850649-83389-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
index 6ed4ad4..e38b64c 100644
--- a/tools/arch/alpha/include/uapi/asm/mman.h
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -5,6 +5,7 @@
 #define MADV_DONTDUMP   16
 #define MADV_DONTFORK	10
 #define MADV_DONTNEED	6
+#define MADV_FREE	8
 #define MADV_HUGEPAGE	14
 #define MADV_MERGEABLE   12
 #define MADV_NOHUGEPAGE	15
@@ -35,4 +36,12 @@
 #define PROT_READ	0x1
 #define PROT_SEM	0x8
 #define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on alpha, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on alpha, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on alpha, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on alpha, fix it for perf */
+#define MAP_UNINITIALIZED	0
 #endif
diff --git a/tools/arch/arc/include/uapi/asm/mman.h b/tools/arch/arc/include/uapi/asm/mman.h
index f76765d..aa3acd2 100644
--- a/tools/arch/arc/include/uapi/asm/mman.h
+++ b/tools/arch/arc/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arc, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/arm/include/uapi/asm/mman.h b/tools/arch/arm/include/uapi/asm/mman.h
index f200638..478f699 100644
--- a/tools/arch/arm/include/uapi/asm/mman.h
+++ b/tools/arch/arm/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/arm64/include/uapi/asm/mman.h b/tools/arch/arm64/include/uapi/asm/mman.h
index a7dd975..70fd311 100644
--- a/tools/arch/arm64/include/uapi/asm/mman.h
+++ b/tools/arch/arm64/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm64, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/frv/include/uapi/asm/mman.h b/tools/arch/frv/include/uapi/asm/mman.h
index 99bba05..5be78ac 100644
--- a/tools/arch/frv/include/uapi/asm/mman.h
+++ b/tools/arch/frv/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on frv, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/h8300/include/uapi/asm/mman.h b/tools/arch/h8300/include/uapi/asm/mman.h
index df95096..9d9ac54 100644
--- a/tools/arch/h8300/include/uapi/asm/mman.h
+++ b/tools/arch/h8300/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on h8300, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/hexagon/include/uapi/asm/mman.h b/tools/arch/hexagon/include/uapi/asm/mman.h
index f1adcce..102f3fa 100644
--- a/tools/arch/hexagon/include/uapi/asm/mman.h
+++ b/tools/arch/hexagon/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on hexagon, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h
index 23420eb..1d6e5ac 100644
--- a/tools/arch/ia64/include/uapi/asm/mman.h
+++ b/tools/arch/ia64/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on ia64, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/m32r/include/uapi/asm/mman.h b/tools/arch/m32r/include/uapi/asm/mman.h
index a35ebd6..1c29635 100644
--- a/tools/arch/m32r/include/uapi/asm/mman.h
+++ b/tools/arch/m32r/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on m32r, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/microblaze/include/uapi/asm/mman.h b/tools/arch/microblaze/include/uapi/asm/mman.h
index 75f460a..005cd50 100644
--- a/tools/arch/microblaze/include/uapi/asm/mman.h
+++ b/tools/arch/microblaze/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on microblaze, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
index db88fa4..c020529 100644
--- a/tools/arch/mips/include/uapi/asm/mman.h
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -5,6 +5,7 @@
 #define MADV_DONTDUMP	16
 #define MADV_DONTFORK	10
 #define MADV_DONTNEED	4
+#define MADV_FREE	8
 #define MADV_HUGEPAGE	14
 #define MADV_HWPOISON	 100
 #define MADV_MERGEABLE	 12
@@ -36,4 +37,10 @@
 #define PROT_READ	0x01
 #define PROT_SEM	0x10
 #define PROT_WRITE	0x02
+/* MADV_SOFT_OFFLINE is undefined on mips, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on mips, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on mips, fix it for perf */
+#define MAP_UNINITIALIZED	0
 #endif
diff --git a/tools/arch/mn10300/include/uapi/asm/mman.h b/tools/arch/mn10300/include/uapi/asm/mman.h
index 81faa5d..c1ea36d 100644
--- a/tools/arch/mn10300/include/uapi/asm/mman.h
+++ b/tools/arch/mn10300/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on mn10300, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index c4a9d9f..03d8d5b 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -5,6 +5,7 @@
 #define MADV_DONTDUMP   69
 #define MADV_DONTFORK	10
 #define MADV_DONTNEED   4
+#define MADV_FREE	8
 #define MADV_HUGEPAGE	67
 #define MADV_MERGEABLE   65
 #define MADV_NOHUGEPAGE	68
@@ -35,4 +36,12 @@
 #define PROT_READ	0x1
 #define PROT_SEM	0x8
 #define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on parisc, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on parisc, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on parisc, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
+#define MAP_UNINITIALIZED	0
 #endif
diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h
index 7a56ab9..761db43 100644
--- a/tools/arch/powerpc/include/uapi/asm/mman.h
+++ b/tools/arch/powerpc/include/uapi/asm/mman.h
@@ -10,4 +10,6 @@
 #define MAP_POPULATE	0x8000
 #define MAP_STACK	0x20000
 #include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on powerpc, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/s390/include/uapi/asm/mman.h b/tools/arch/s390/include/uapi/asm/mman.h
index fe53b91..b03dea9 100644
--- a/tools/arch/s390/include/uapi/asm/mman.h
+++ b/tools/arch/s390/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on s390, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/score/include/uapi/asm/mman.h b/tools/arch/score/include/uapi/asm/mman.h
index ba1ee9c..2f8fb89 100644
--- a/tools/arch/score/include/uapi/asm/mman.h
+++ b/tools/arch/score/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on score, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/sh/include/uapi/asm/mman.h b/tools/arch/sh/include/uapi/asm/mman.h
index 5a47d8c..26504f6 100644
--- a/tools/arch/sh/include/uapi/asm/mman.h
+++ b/tools/arch/sh/include/uapi/asm/mman.h
@@ -1,4 +1,6 @@
 #ifndef TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
 #include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on sh, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h
index b88f3ac..8640525 100644
--- a/tools/arch/sparc/include/uapi/asm/mman.h
+++ b/tools/arch/sparc/include/uapi/asm/mman.h
@@ -10,4 +10,6 @@
 #define MAP_POPULATE	0x8000
 #define MAP_STACK	0x20000
 #include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on sparc, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/tile/include/uapi/asm/mman.h b/tools/arch/tile/include/uapi/asm/mman.h
index b0a054f..7116c4b 100644
--- a/tools/arch/tile/include/uapi/asm/mman.h
+++ b/tools/arch/tile/include/uapi/asm/mman.h
@@ -10,4 +10,6 @@
 #define MAP_POPULATE	0x0040
 #define MAP_STACK	MAP_GROWSDOWN
 #include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on tile, fix it for perf */
+#define MAP_32BIT	0
 #endif
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
index 1c89538..4453195 100644
--- a/tools/arch/xtensa/include/uapi/asm/mman.h
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -5,6 +5,7 @@
 #define MADV_DONTDUMP   16
 #define MADV_DONTFORK	10
 #define MADV_DONTNEED	4
+#define MADV_FREE	8
 #define MADV_HUGEPAGE	14
 #define MADV_MERGEABLE   12
 #define MADV_NOHUGEPAGE	15
@@ -35,4 +36,12 @@
 #define PROT_READ	0x1
 #define PROT_SEM	0x10
 #define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on xtensa, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on xtensa, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on xtensa, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on xtensa, fix it for perf */
+#define MAP_UNINITIALIZED	0
 #endif
-- 
cgit v0.10.2


From 8ee83b2ab3d1987cbd80c9f2c6f2b12fed87b51e Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 16 Sep 2016 16:48:19 +0300
Subject: perf/x86/intel/pt: Add support for PTWRITE and power event tracing

The Intel PT facility grew some new functionality:

  * PTWRITE packet carries the payload of the new PTWRITE instruction
    that can be used to instrument Intel PT traces with user-supplied
    data. Packets of this type are only generated if 'ptwrite' capability
    is set and PTWEn bit is set in the event attribute's config. Flow
    update packets (FUP) can be generated on PTWRITE packets if FUPonPTW
    config bit is set. Setting these bits is not allowed if 'ptwrite'
    capability is not set.

  * PWRE, PWRX, MWAIT, EXSTOP packets communicate core power management
    events. These depend on 'power_event_tracing' capability and are
    enabled by setting PwrEvtEn bit in the event attribute.

Extend the driver capabilities and provide the proper sanity checks in the
event validation function.

[ tglx: Massaged changelog ]

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: vince@deater.net
Cc: eranian@google.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/20160916134819.1978-1-alexander.shishkin@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 04bb5fb..18d18fd 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -69,6 +69,8 @@ static struct pt_cap_desc {
 	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
 	PT_CAP(ip_filtering,		0, CR_EBX, BIT(2)),
 	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
+	PT_CAP(ptwrite,			0, CR_EBX, BIT(4)),
+	PT_CAP(power_event_trace,	0, CR_EBX, BIT(5)),
 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
 	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
@@ -259,10 +261,16 @@ fail:
 #define RTIT_CTL_MTC	(RTIT_CTL_MTC_EN	| \
 			 RTIT_CTL_MTC_RANGE)
 
+#define RTIT_CTL_PTW	(RTIT_CTL_PTW_EN	| \
+			 RTIT_CTL_FUP_ON_PTW)
+
 #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
 			RTIT_CTL_DISRETC	| \
 			RTIT_CTL_CYC_PSB	| \
-			RTIT_CTL_MTC)
+			RTIT_CTL_MTC		| \
+			RTIT_CTL_PWR_EVT_EN	| \
+			RTIT_CTL_FUP_ON_PTW	| \
+			RTIT_CTL_PTW_EN)
 
 static bool pt_event_valid(struct perf_event *event)
 {
@@ -311,6 +319,20 @@ static bool pt_event_valid(struct perf_event *event)
 			return false;
 	}
 
+	if (config & RTIT_CTL_PWR_EVT_EN &&
+	    !pt_cap_get(PT_CAP_power_event_trace))
+		return false;
+
+	if (config & RTIT_CTL_PTW) {
+		if (!pt_cap_get(PT_CAP_ptwrite))
+			return false;
+
+		/* FUPonPTW without PTW doesn't make sense */
+		if ((config & RTIT_CTL_FUP_ON_PTW) &&
+		    !(config & RTIT_CTL_PTW_EN))
+			return false;
+	}
+
 	return true;
 }
 
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index efffa4a..53473c2 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -26,11 +26,14 @@
 #define RTIT_CTL_CYCLEACC		BIT(1)
 #define RTIT_CTL_OS			BIT(2)
 #define RTIT_CTL_USR			BIT(3)
+#define RTIT_CTL_PWR_EVT_EN		BIT(4)
+#define RTIT_CTL_FUP_ON_PTW		BIT(5)
 #define RTIT_CTL_CR3EN			BIT(7)
 #define RTIT_CTL_TOPA			BIT(8)
 #define RTIT_CTL_MTC_EN			BIT(9)
 #define RTIT_CTL_TSC_EN			BIT(10)
 #define RTIT_CTL_DISRETC		BIT(11)
+#define RTIT_CTL_PTW_EN			BIT(12)
 #define RTIT_CTL_BRANCH_EN		BIT(13)
 #define RTIT_CTL_MTC_RANGE_OFFSET	14
 #define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
@@ -91,6 +94,8 @@ enum pt_capabilities {
 	PT_CAP_psb_cyc,
 	PT_CAP_ip_filtering,
 	PT_CAP_mtc,
+	PT_CAP_ptwrite,
+	PT_CAP_power_event_trace,
 	PT_CAP_topa_output,
 	PT_CAP_topa_multiple_entries,
 	PT_CAP_single_range_output,
-- 
cgit v0.10.2


From f666ac0dab5afaf6ebed2c361251581bfccc4003 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 19 Sep 2016 15:10:10 +0200
Subject: perf hists: Fix width computation for srcline sort entry

Adding header size to width computation for srcline sort entry,
because it's possible to get empty data with ':0' which set width
of 2 which is lower than width needed to display column header.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474290610-23241-62-git-send-email-jolsa@kernel.org
[ Added declaration to sort.h ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 37a08f2..b02992e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -177,8 +177,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
 	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
 
-	if (h->srcline)
-		hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline));
+	if (h->srcline) {
+		len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+		hists__new_col_len(hists, HISTC_SRCLINE, len);
+	}
 
 	if (h->srcfile)
 		hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 28c0524..9505483 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -40,6 +40,7 @@ extern struct sort_entry sort_dso_from;
 extern struct sort_entry sort_dso_to;
 extern struct sort_entry sort_sym_from;
 extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
 extern enum sort_type sort__first_dimension;
 extern const char default_mem_sort_order[];
 
-- 
cgit v0.10.2


From 88a7fcf961a27fbd248e3c914fc9df8327f7cdbb Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Fri, 19 Aug 2016 18:29:35 +0530
Subject: perf annotate: Do not ignore call instruction with indirect target

Do not ignore call instruction with indirect target when its already
identified as a call. This is an extension of commit e8ea1561952b ("perf
annotate: Use raw form for register indirect call instructions") to
generalize annotation for all instructions with indirect calls.

This is needed for certain powerpc call instructions that use address in
a register (such as bctrl, btarl, ...).

Apart from that, when kcore is used to disassemble function, all call
instructions were ignored. This patch will fix it as a side effect by
not ignoring them. For example,

Before (with kcore):
       mov    %r13,%rdi
       callq  0xffffffff811a7e70
     ^ jmpq   64
       mov    %gs:0x7ef41a6e(%rip),%al

After (with kcore):
       mov    %r13,%rdi
     > callq  0xffffffff811a7e70
     ^ jmpq   64
       mov    %gs:0x7ef41a6e(%rip),%al

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
[Suggested about 'bctrl' instruction]
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1471611578-11255-5-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7a80c73..60e915f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -82,16 +82,12 @@ static int call__parse(struct ins_operands *ops)
 	return ops->target.name == NULL ? -1 : 0;
 
 indirect_call:
-	tok = strchr(endptr, '(');
-	if (tok != NULL) {
+	tok = strchr(endptr, '*');
+	if (tok == NULL) {
 		ops->target.addr = 0;
 		return 0;
 	}
 
-	tok = strchr(endptr, '*');
-	if (tok == NULL)
-		return -1;
-
 	ops->target.addr = strtoull(tok + 1, NULL, 16);
 	return 0;
 }
-- 
cgit v0.10.2


From bff5c3061374c37ed1262131eb333f714e5bcdf8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Sep 2016 17:18:16 -0300
Subject: perf annotate: Pass the symbol's map/dso to the instruction parsers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So that things like:

       → callq  0xffffffff993e3230

found while disassembling /proc/kcore can be beautified by later
patches, that will resolve that address to a function, looking it up in
/proc/kallsyms.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-p76myuke4j7gplg54amaklxk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 60e915f..aef8417 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -54,7 +54,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
 	return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	char *endptr, *tok, *name;
 
@@ -114,7 +114,7 @@ bool ins__is_call(const struct ins *ins)
 	return ins->ops == &call_ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	const char *s = strchr(ops->raw, '+');
 
@@ -169,7 +169,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
 	return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, struct map *map)
 {
 	char *name;
 
@@ -190,7 +190,7 @@ static int lock__parse(struct ins_operands *ops)
 		return 0;
 
 	if (ops->locked.ins->ops->parse &&
-	    ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+	    ops->locked.ins->ops->parse(ops->locked.ops, map) < 0)
 		goto out_free_ops;
 
 	return 0;
@@ -233,7 +233,7 @@ static struct ins_ops lock_ops = {
 	.scnprintf = lock__scnprintf,
 };
 
-static int mov__parse(struct ins_operands *ops)
+static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	char *s = strchr(ops->raw, ','), *target, *comment, prev;
 
@@ -300,7 +300,7 @@ static struct ins_ops mov_ops = {
 	.scnprintf = mov__scnprintf,
 };
 
-static int dec__parse(struct ins_operands *ops)
+static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	char *target, *comment, *s, prev;
 
@@ -705,7 +705,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
 	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
 }
 
-static void disasm_line__init_ins(struct disasm_line *dl)
+static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
 {
 	dl->ins = ins__find(dl->name);
 
@@ -715,7 +715,7 @@ static void disasm_line__init_ins(struct disasm_line *dl)
 	if (!dl->ins->ops)
 		return;
 
-	if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0)
+	if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0)
 		dl->ins = NULL;
 }
 
@@ -757,7 +757,8 @@ out_free_name:
 }
 
 static struct disasm_line *disasm_line__new(s64 offset, char *line,
-					size_t privsize, int line_nr)
+					    size_t privsize, int line_nr,
+					    struct map *map)
 {
 	struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
 
@@ -772,7 +773,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line,
 			if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
 				goto out_free_line;
 
-			disasm_line__init_ins(dl);
+			disasm_line__init_ins(dl, map);
 		}
 	}
 
@@ -1144,7 +1145,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 			parsed_line = tmp2 + 1;
 	}
 
-	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr);
+	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map);
 	free(line);
 	(*line_nr)++;
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ea44e4f..5bbcec1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -36,7 +36,7 @@ struct ins_operands {
 
 struct ins_ops {
 	void (*free)(struct ins_operands *ops);
-	int (*parse)(struct ins_operands *ops);
+	int (*parse)(struct ins_operands *ops, struct map *map);
 	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
 			 struct ins_operands *ops);
 };
-- 
cgit v0.10.2


From 5f62d4fd3593bc5bb33c75238cc1d6bf0b34fac9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Sep 2016 17:26:11 -0300
Subject: perf annotate: Resolve 'call' operands to function names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this patch the '_raw_spin_lock_irqsave' and 'update_rq_clock' operands
were appearing just as hexadecimal numbers:

  update_blocked_averages  /proc/kcore
       │       push   %r12
       │       push   %rbx
       │       and    $0xfffffffffffffff0,%rsp
       │       sub    $0x40,%rsp
       │       add    -0x662cac00(,%rdi,8),%rax
       │       mov    %rax,%rbx
       │       mov    %rax,%rdi
       │       mov    %rax,0x38(%rsp)
       │     → callq  _raw_spin_lock_irqsave
       │       mov    %rbx,%rdi
       │       mov    %rax,0x30(%rsp)
       │     → callq  update_rq_clock
       │       mov    0x8d0(%rbx),%rax
       │       lea    0x8d0(%rbx),%r11

To check that all is right one can always use the 'o' hotkey and see
the original objdump -dS output, that for this case is:

  update_blocked_averages  /proc/kcore
       │ffffffff990d5489:   push   %r12
       │ffffffff990d548b:   push   %rbx
       │ffffffff990d548c:   and    $0xfffffffffffffff0,%rsp
       │ffffffff990d5490:   sub    $0x40,%rsp
       │ffffffff990d5494:   add    -0x662cac00(,%rdi,8),%rax
       │ffffffff990d549c:   mov    %rax,%rbx
       │ffffffff990d549f:   mov    %rax,%rdi
       │ffffffff990d54a2:   mov    %rax,0x38(%rsp)
       │ffffffff990d54a7: → callq  0xffffffff997eb7a0
       │ffffffff990d54ac:   mov    %rbx,%rdi
       │ffffffff990d54af:   mov    %rax,0x30(%rsp)
       │ffffffff990d54b4: → callq  0xffffffff990c7720
       │ffffffff990d54b9:   mov    0x8d0(%rbx),%rax
       │ffffffff990d54c0:   lea    0x8d0(%rbx),%r11

Use the 'h' hotkey to see a list of available hotkeys.

More work needed to cover operands for other instructions, such as 'mov',
that can resolve variable names, etc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chris Riyder <chris.ryder@arm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-xqgtw9mzmzcjgwkis9kiiv1p@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aef8417..aeb5a44 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -54,7 +54,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
 	return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops, struct map *map __maybe_unused)
+static int call__parse(struct ins_operands *ops, struct map *map)
 {
 	char *endptr, *tok, *name;
 
@@ -84,7 +84,11 @@ static int call__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 indirect_call:
 	tok = strchr(endptr, '*');
 	if (tok == NULL) {
-		ops->target.addr = 0;
+		struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr));
+		if (sym != NULL)
+			ops->target.name = strdup(sym->name);
+		else
+			ops->target.addr = 0;
 		return 0;
 	}
 
-- 
cgit v0.10.2


From 5ff3e7a224d40f9dd73625b91377787034a8b35e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 20 Sep 2016 14:30:23 +0900
Subject: perf ui/tui: Reset output width for hierarchy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When --hierarchy option is used, each entry has its own hpp_list to show
the result.  But it missed to update width of each column.

Before:

  - 46.29% 48.12%        netctl-auto
     + 31.44% 29.25%        [kernel.vmlinux]
     + 8.52% 11.55%        libc-2.22.so
     + 5.19% 6.91%        bash
  + 10.75% 11.83%        wpa_cli
  + 8.25% 2.23%        swapper
  + 6.45% 5.40%        tr
  + 4.81% 8.09%        awk
  + 4.15% 2.85%        firefox
  + 3.86% 2.53%        sh

After:

  -  46.29%  48.12%        netctl-auto
      +  31.44%  29.25%        [kernel.vmlinux]
      +   8.52%  11.55%        libc-2.22.so
      +   5.19%   6.91%        bash
  +  10.75%  11.83%        wpa_cli
  +   8.25%   2.23%        swapper
  +   6.45%   5.40%        tr
  +   4.81%   8.09%        awk
  +   4.15%   2.85%        firefox
  +   3.86%   2.53%        sh

Committer note:

Full testing instructions:

1) Record with an event group:

  $ perf record -e '{cycles,instructions}' make -j4

2) Use report in hierarchy mode, to get a few expanded trees on
   the same screen, use --percent-limit:

  $ perf report --hierarchy --percent-limit 0.5

Samples: 103K of event 'anon group { cycles:u, instructions:u }',
Event count (approx.): 57317631725
         Overhead        Command / Shared Object / Symbol        ◆
-  58.89%  55.12%        cc1                                     ▒
   -  50.26%  48.10%        cc1                                  ▒
          3.61%   5.13%        [.] _cpp_lex_token                ▒
          2.58%   0.78%        [.] ht_lookup_with_hash           ▒
          1.31%   1.30%        [.] ggc_internal_alloc            ▒
          1.08%   2.25%        [.] get_combined_adhoc_loc        ▒
          1.01%   1.95%        [.] ira_init                      ▒
          0.96%   1.78%        [.] linemap_position_for_column   ▒
          0.65%   1.01%        [.] cpp_get_token_with_location   ▒
   -   7.52%   6.58%        libc-2.23.so                         ▒
          1.70%   1.78%        [.] _int_malloc                   ▒
          0.69%   0.75%        [.] _int_free                     ▒
          0.67%   0.42%        [.] malloc_consolidate            ▒
   -   0.58%   0.42%        ld-2.23.so                           ▒
                               no entry >= 0.50%                 ▒
   -   0.52%   0.03%        [kernel.vmlinux]                     ▒
                               no entry >= 0.50%                 ▒

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 1b2dbbf41a0f ("perf hists: Use own hpp_list for hierarchy mode")
Link: http://lkml.kernel.org/r/20160920053025.13989-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 35e44b1..49db163 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,6 +2067,7 @@ void hist_browser__init(struct hist_browser *browser,
 			struct hists *hists)
 {
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *node;
 
 	browser->hists			= hists;
 	browser->b.refresh		= hist_browser__refresh;
@@ -2079,6 +2080,11 @@ void hist_browser__init(struct hist_browser *browser,
 		perf_hpp__reset_width(fmt, hists);
 		++browser->b.columns;
 	}
+	/* hierarchy entries have their own hpp list */
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
-- 
cgit v0.10.2


From e3b60bc93d81e0542ac433df226b8de8b963533e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 20 Sep 2016 14:30:24 +0900
Subject: perf hists: Factor out hists__reset_column_width()

The stdio and tui has same code to reset hpp format column width.
Factor it out as a new function.

Suggested-and-Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20160920053025.13989-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 49db163..a6d5d24 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,7 +2067,6 @@ void hist_browser__init(struct hist_browser *browser,
 			struct hists *hists)
 {
 	struct perf_hpp_fmt *fmt;
-	struct perf_hpp_list_node *node;
 
 	browser->hists			= hists;
 	browser->b.refresh		= hist_browser__refresh;
@@ -2076,15 +2075,10 @@ void hist_browser__init(struct hist_browser *browser,
 	browser->b.use_navkeypressed	= true;
 	browser->show_headers		= symbol_conf.show_hist_headers;
 
-	hists__for_each_format(hists, fmt) {
-		perf_hpp__reset_width(fmt, hists);
+	hists__for_each_format(hists, fmt)
 		++browser->b.columns;
-	}
-	/* hierarchy entries have their own hpp list */
-	list_for_each_entry(node, &hists->hpp_formats, list) {
-		perf_hpp_list__for_each_format(&node->hpp, fmt)
-			perf_hpp__reset_width(fmt, hists);
-	}
+
+	hists__reset_column_width(hists);
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index b47fafc..60c4a4d 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -699,6 +699,21 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 	}
 }
 
+void hists__reset_column_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *node;
+
+	hists__for_each_format(hists, fmt)
+		perf_hpp__reset_width(fmt, hists);
+
+	/* hierarchy entries have their own hpp list */
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
+}
+
 void perf_hpp__set_user_width(const char *width_list_str)
 {
 	struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a57131e..8e1840b 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -717,8 +717,6 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp,
 		      bool use_callchain)
 {
-	struct perf_hpp_fmt *fmt;
-	struct perf_hpp_list_node *node;
 	struct rb_node *nd;
 	size_t ret = 0;
 	const char *sep = symbol_conf.field_sep;
@@ -729,13 +727,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
 	init_rem_hits();
 
-	hists__for_each_format(hists, fmt)
-		perf_hpp__reset_width(fmt, hists);
-	/* hierarchy entries have their own hpp list */
-	list_for_each_entry(node, &hists->hpp_formats, list) {
-		perf_hpp_list__for_each_format(&node->hpp, fmt)
-			perf_hpp__reset_width(fmt, hists);
-	}
+	hists__reset_column_width(hists);
 
 	if (symbol_conf.col_width_list_str)
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a002c93..defa957 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -368,6 +368,7 @@ static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
 typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
-- 
cgit v0.10.2


From 3c028a0cb5b71f47d523bc8ad2c597cb257f41fb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 20 Sep 2016 18:12:45 +0200
Subject: perf symbols: Do not open device files

The dso__read_binary_type_filename gets the dso's file name to open. We
need to check it for regular file before trying to open it, otherwise we
might get stuck with device file.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160920161245.GA8995@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 774f6ec..d2c6cdd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -363,6 +363,9 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 		return -EINVAL;
 	}
 
+	if (!is_regular_file(name))
+		return -EINVAL;
+
 	fd = do_open(name);
 	free(name);
 	return fd;
-- 
cgit v0.10.2


From 82deb8a242cd8aceaf553c9fb731f91dbdc1f9a6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 19 Sep 2016 15:09:11 +0200
Subject: perf evsel: Remove superfluous initialization of weight

Removing superfluous initialization of weight, it's already set to 0 via
memset.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474290610-23241-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 21fd573..f3225a2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1728,7 +1728,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	data->cpu = data->pid = data->tid = -1;
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
-	data->weight = 0;
 	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -1935,7 +1934,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		}
 	}
 
-	data->weight = 0;
 	if (type & PERF_SAMPLE_WEIGHT) {
 		OVERFLOW_CHECK_u64(array);
 		data->weight = *array;
-- 
cgit v0.10.2


From d5278220be663753a011910c194d50758cd8dc98 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 19 Sep 2016 15:09:13 +0200
Subject: perf hists: Use bigger buffer for stdio headers

With node column on big CPUs servers we can run out of stdio header
space quite soon. Enlarging header buffer.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474290610-23241-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 8e1840b..c8dca34 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -698,7 +698,7 @@ hists__fprintf_standard_headers(struct hists *hists,
 
 static int hists__fprintf_headers(struct hists *hists, FILE *fp)
 {
-	char bf[96];
+	char bf[1024];
 	struct perf_hpp dummy_hpp = {
 		.buf	= bf,
 		.size	= sizeof(bf),
-- 
cgit v0.10.2


From 08d5204adbf845fadd936aae7639fc05d4eddee1 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:49:58 -0600
Subject: perf tools: Confine __get_cpuid() to x86 architecture

The __get_cpuid() test is only valid when compiling for x86.  When
compiling for other architectures like ARM/ARM64 the test fails event if
the functionality is not required.

This patch isolate the build-in feature check to x86 platform, allowing
the compilation and usage of PMUs that use the AUXTRACE infrastructure
on other architectures (i.e ARM CoreSight).

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-2-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 24803c5..72edf83 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -746,10 +746,13 @@ ifdef LIBBABELTRACE
 endif
 
 ifndef NO_AUXTRACE
-  ifeq ($(feature-get_cpuid), 0)
-    msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
-    NO_AUXTRACE := 1
-  else
+  ifeq ($(ARCH),x86)
+    ifeq ($(feature-get_cpuid), 0)
+      msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+      NO_AUXTRACE := 1
+    endif
+  endif
+  ifndef NO_AUXTRACE
     $(call detected,CONFIG_AUXTRACE)
     CFLAGS += -DHAVE_AUXTRACE_SUPPORT
   endif
-- 
cgit v0.10.2


From 7e21b0d579a481e2e7064c6383d5873d841777a8 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:49:59 -0600
Subject: perf tools: Make coresight PMU listable

Adding the required mechanic allowing 'perf list pmu' to discover
coresight ETM/PTM tracers.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-3-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index db814a8..7407fe7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1123,6 +1123,7 @@ F:	drivers/hwtracing/coresight/*
 F:	Documentation/trace/coresight.txt
 F:	Documentation/devicetree/bindings/arm/coresight.txt
 F:	Documentation/ABI/testing/sysfs-bus-coresight-devices-*
+F:	tools/perf/arch/arm/util/pmu.c
 
 ARM/CORGI MACHINE SUPPORT
 M:	Richard Purdie <rpurdie@rpsys.net>
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f98da17..4093fd1 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -2,3 +2,5 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o
 
 libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
new file mode 100644
index 0000000..af9fb66
--- /dev/null
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+	if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+		/* add ETM default config here */
+		pmu->selectable = true;
+	}
+#endif
+	return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 02f41db..3876dd0 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,4 @@
 libperf-$(CONFIG_DWARF)     += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o
-- 
cgit v0.10.2


From a818c563ae16640e00389a39e7b0e7ae4bd3d64c Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:50:00 -0600
Subject: perf tools: Add coresight etm PMU record capabilities

Coresight ETMs are IP blocks used to perform HW assisted tracing on a
CPU core.  This patch introduce the required auxiliary API functions
allowing the perf core to interact with a tracer.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-4-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 7407fe7..ac29d54 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1124,6 +1124,10 @@ F:	Documentation/trace/coresight.txt
 F:	Documentation/devicetree/bindings/arm/coresight.txt
 F:	Documentation/ABI/testing/sysfs-bus-coresight-devices-*
 F:	tools/perf/arch/arm/util/pmu.c
+F:	tools/perf/arch/arm/util/auxtrace.c
+F:	tools/perf/arch/arm/util/cs-etm.c
+F:	tools/perf/arch/arm/util/cs-etm.h
+F:	tools/perf/util/cs-etm.h
 
 ARM/CORGI MACHINE SUPPORT
 M:	Richard Purdie <rpurdie@rpsys.net>
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index 4093fd1..e64c5f2 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -3,4 +3,4 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 
-libperf-$(CONFIG_AUXTRACE) += pmu.o
+libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
new file mode 100644
index 0000000..8edf2cb
--- /dev/null
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdbool.h>
+#include <linux/coresight-pmu.h>
+
+#include "../../util/auxtrace.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "cs-etm.h"
+
+struct auxtrace_record
+*auxtrace_record__init(struct perf_evlist *evlist, int *err)
+{
+	struct perf_pmu	*cs_etm_pmu;
+	struct perf_evsel *evsel;
+	bool found_etm = false;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (evlist) {
+		evlist__for_each_entry(evlist, evsel) {
+			if (cs_etm_pmu &&
+			    evsel->attr.type == cs_etm_pmu->type)
+				found_etm = true;
+		}
+	}
+
+	if (found_etm)
+		return cs_etm_record_init(err);
+
+	/*
+	 * Clear 'err' even if we haven't found a cs_etm event - that way perf
+	 * record can still be used even if tracers aren't present.  The NULL
+	 * return value will take care of telling the infrastructure HW tracing
+	 * isn't available.
+	 */
+	*err = 0;
+	return NULL;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
new file mode 100644
index 0000000..829c479
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <api/fs/fs.h>
+#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include "cs-etm.h"
+#include "../../perf.h"
+#include "../../util/auxtrace.h"
+#include "../../util/cpumap.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "../../util/thread_map.h"
+#include "../../util/cs-etm.h"
+
+#include <stdlib.h>
+
+struct cs_etm_recording {
+	struct auxtrace_record	itr;
+	struct perf_pmu		*cs_etm_pmu;
+	struct perf_evlist	*evlist;
+	bool			snapshot_mode;
+	size_t			snapshot_size;
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+
+static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
+					 struct record_opts *opts,
+					 const char *str)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+	ptr->snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+static int cs_etm_recording_options(struct auxtrace_record *itr,
+				    struct perf_evlist *evlist,
+				    struct record_opts *opts)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evsel *evsel, *cs_etm_evsel = NULL;
+	const struct cpu_map *cpus = evlist->cpus;
+	bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+
+	ptr->evlist = evlist;
+	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			if (cs_etm_evsel) {
+				pr_err("There may be only one %s event\n",
+				       CORESIGHT_ETM_PMU_NAME);
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			cs_etm_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	/* no need to continue if at least one event of interest was found */
+	if (!cs_etm_evsel)
+		return 0;
+
+	if (opts->use_clockid) {
+		pr_err("Cannot use clockid (-k option) with %s\n",
+		       CORESIGHT_ETM_PMU_NAME);
+		return -EINVAL;
+	}
+
+	/* we are in snapshot mode */
+	if (opts->auxtrace_snapshot_mode) {
+		/*
+		 * No size were given to '-S' or '-m,', so go with
+		 * the default
+		 */
+		if (!opts->auxtrace_snapshot_size &&
+		    !opts->auxtrace_mmap_pages) {
+			if (privileged) {
+				opts->auxtrace_mmap_pages = MiB(4) / page_size;
+			} else {
+				opts->auxtrace_mmap_pages =
+							KiB(128) / page_size;
+				if (opts->mmap_pages == UINT_MAX)
+					opts->mmap_pages = KiB(256) / page_size;
+			}
+		} else if (!opts->auxtrace_mmap_pages && !privileged &&
+						opts->mmap_pages == UINT_MAX) {
+			opts->mmap_pages = KiB(256) / page_size;
+		}
+
+		/*
+		 * '-m,xyz' was specified but no snapshot size, so make the
+		 * snapshot size as big as the auxtrace mmap area.
+		 */
+		if (!opts->auxtrace_snapshot_size) {
+			opts->auxtrace_snapshot_size =
+				opts->auxtrace_mmap_pages * (size_t)page_size;
+		}
+
+		/*
+		 * -Sxyz was specified but no auxtrace mmap area, so make the
+		 * auxtrace mmap area big enough to fit the requested snapshot
+		 * size.
+		 */
+		if (!opts->auxtrace_mmap_pages) {
+			size_t sz = opts->auxtrace_snapshot_size;
+
+			sz = round_up(sz, page_size) / page_size;
+			opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+		}
+
+		/* Snapshost size can't be bigger than the auxtrace area */
+		if (opts->auxtrace_snapshot_size >
+				opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+
+		/* Something went wrong somewhere - this shouldn't happen */
+		if (!opts->auxtrace_snapshot_size ||
+		    !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+	}
+
+	/* We are in full trace mode but '-m,xyz' wasn't specified */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+
+	}
+
+	/* Validate auxtrace_mmap_pages provided by user */
+	if (opts->auxtrace_mmap_pages) {
+		unsigned int max_page = (KiB(128) / page_size);
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+		if (!privileged &&
+		    opts->auxtrace_mmap_pages > max_page) {
+			opts->auxtrace_mmap_pages = max_page;
+			pr_err("auxtrace too big, truncating to %d\n",
+			       max_page);
+		}
+
+		if (!is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for %s: must be a power of 2\n",
+			       CORESIGHT_ETM_PMU_NAME);
+			return -EINVAL;
+		}
+	}
+
+	if (opts->auxtrace_snapshot_mode)
+		pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
+			  opts->auxtrace_snapshot_size);
+
+	if (cs_etm_evsel) {
+		/*
+		 * To obtain the auxtrace buffer file descriptor, the auxtrace
+		 * event must come first.
+		 */
+		perf_evlist__to_front(evlist, cs_etm_evsel);
+		/*
+		 * In the case of per-cpu mmaps, we need the CPU on the
+		 * AUX event.
+		 */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+	}
+
+	/* Add dummy event to keep tracking */
+	if (opts->full_auxtrace) {
+		struct perf_evsel *tracking_evsel;
+		int err;
+
+		err = parse_events(evlist, "dummy:u", NULL);
+		if (err)
+			return err;
+
+		tracking_evsel = perf_evlist__last(evlist);
+		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+		tracking_evsel->attr.freq = 0;
+		tracking_evsel->attr.sample_period = 1;
+
+		/* In per-cpu case, always need the time of mmap events etc */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+	}
+
+	return 0;
+}
+
+static u64 cs_etm_get_config(struct auxtrace_record *itr)
+{
+	u64 config = 0;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evlist *evlist = ptr->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			/*
+			 * Variable perf_event_attr::config is assigned to
+			 * ETMv3/PTM.  The bit fields have been made to match
+			 * the ETMv3.5 ETRMCR register specification.  See the
+			 * PMU_FORMAT_ATTR() declarations in
+			 * drivers/hwtracing/coresight/coresight-perf.c for
+			 * details.
+			 */
+			config = evsel->attr.config;
+			break;
+		}
+	}
+
+	return config;
+}
+
+static size_t
+cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	int i;
+	int etmv3 = 0, etmv4 = 0;
+	const struct cpu_map *cpus = evlist->cpus;
+
+	/* cpu map is not empty, we have specific CPUs to work with */
+	if (!cpu_map__empty(cpus)) {
+		for (i = 0; i < cpu_map__nr(cpus); i++) {
+			if (cs_etm_is_etmv4(itr, cpus->map[i]))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	} else {
+		/* get configuration for all CPUs in the system */
+		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (cs_etm_is_etmv4(itr, i))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	}
+
+	return (CS_ETM_HEADER_SIZE +
+	       (etmv4 * CS_ETMV4_PRIV_SIZE) +
+	       (etmv3 * CS_ETMV3_PRIV_SIZE));
+}
+
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+	[CS_ETM_ETMCCER]	= "mgmt/etmccer",
+	[CS_ETM_ETMIDR]		= "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+	[CS_ETMV4_TRCIDR0]		= "trcidr/trcidr0",
+	[CS_ETMV4_TRCIDR1]		= "trcidr/trcidr1",
+	[CS_ETMV4_TRCIDR2]		= "trcidr/trcidr2",
+	[CS_ETMV4_TRCIDR8]		= "trcidr/trcidr8",
+	[CS_ETMV4_TRCAUTHSTATUS]	= "mgmt/trcauthstatus",
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
+{
+	bool ret = false;
+	char path[PATH_MAX];
+	int scan;
+	unsigned int val;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* Take any of the RO files for ETMv4 and see if it present */
+	snprintf(path, PATH_MAX, "cpu%d/%s",
+		 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+	scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+	/* The file was read successfully, we have a winner */
+	if (scan == 1)
+		ret = true;
+
+	return ret;
+}
+
+static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
+{
+	char pmu_path[PATH_MAX];
+	int scan;
+	unsigned int val = 0;
+
+	/* Get RO metadata from sysfs */
+	snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
+
+	scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
+	if (scan != 1)
+		pr_err("%s: error reading: %s\n", __func__, pmu_path);
+
+	return val;
+}
+
+static void cs_etm_get_metadata(int cpu, u32 *offset,
+				struct auxtrace_record *itr,
+				struct auxtrace_info_event *info)
+{
+	u32 increment;
+	u64 magic;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* first see what kind of tracer this cpu is affined to */
+	if (cs_etm_is_etmv4(itr, cpu)) {
+		magic = __perf_cs_etmv4_magic;
+		/* Get trace configuration register */
+		info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
+						cs_etm_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETMV4_TRCIDR0] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+		info->priv[*offset + CS_ETMV4_TRCIDR1] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+		info->priv[*offset + CS_ETMV4_TRCIDR2] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+		info->priv[*offset + CS_ETMV4_TRCIDR8] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+		info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro
+				      [CS_ETMV4_TRCAUTHSTATUS]);
+
+		/* How much space was used */
+		increment = CS_ETMV4_PRIV_MAX;
+	} else {
+		magic = __perf_cs_etmv3_magic;
+		/* Get configuration register */
+		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETM_ETMTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETM_ETMCCER] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMCCER]);
+		info->priv[*offset + CS_ETM_ETMIDR] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMIDR]);
+
+		/* How much space was used */
+		increment = CS_ETM_PRIV_MAX;
+	}
+
+	/* Build generic header portion */
+	info->priv[*offset + CS_ETM_MAGIC] = magic;
+	info->priv[*offset + CS_ETM_CPU] = cpu;
+	/* Where the next CPU entry should start from */
+	*offset += increment;
+}
+
+static int cs_etm_info_fill(struct auxtrace_record *itr,
+			    struct perf_session *session,
+			    struct auxtrace_info_event *info,
+			    size_t priv_size)
+{
+	int i;
+	u32 offset;
+	u64 nr_cpu, type;
+	const struct cpu_map *cpus = session->evlist->cpus;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
+		return -EINVAL;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	/* If the cpu_map is empty all CPUs are involved */
+	nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
+	/* Get PMU type as dynamically assigned by the core */
+	type = cs_etm_pmu->type;
+
+	/* First fill out the session header */
+	info->type = PERF_AUXTRACE_CS_ETM;
+	info->priv[CS_HEADER_VERSION_0] = 0;
+	info->priv[CS_PMU_TYPE_CPUS] = type << 32;
+	info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
+	info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
+
+	offset = CS_ETM_SNAPSHOT + 1;
+
+	/* cpu map is not empty, we have specific CPUs to work with */
+	if (!cpu_map__empty(cpus)) {
+		for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
+			cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
+	} else {
+		/* get configuration for all CPUs in the system */
+		for (i = 0; i < cpu__max_cpu(); i++)
+			cs_etm_get_metadata(i, &offset, itr, info);
+	}
+
+	return 0;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
+				int idx, struct auxtrace_mmap *mm,
+				unsigned char *data __maybe_unused,
+				u64 *head, u64 *old)
+{
+	pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+	*old = *head;
+	*head += mm->len;
+
+	return 0;
+}
+
+static int cs_etm_snapshot_start(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return (((u64) rand() <<  0) & 0x00000000FFFFFFFFull) |
+		(((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
+}
+
+static void cs_etm_recording_free(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	free(ptr);
+}
+
+static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evlist__enable_event_idx(ptr->evlist,
+							     evsel, idx);
+	}
+
+	return -EINVAL;
+}
+
+struct auxtrace_record *cs_etm_record_init(int *err)
+{
+	struct perf_pmu *cs_etm_pmu;
+	struct cs_etm_recording *ptr;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (!cs_etm_pmu) {
+		*err = -EINVAL;
+		goto out;
+	}
+
+	ptr = zalloc(sizeof(struct cs_etm_recording));
+	if (!ptr) {
+		*err = -ENOMEM;
+		goto out;
+	}
+
+	ptr->cs_etm_pmu			= cs_etm_pmu;
+	ptr->itr.parse_snapshot_options	= cs_etm_parse_snapshot_options;
+	ptr->itr.recording_options	= cs_etm_recording_options;
+	ptr->itr.info_priv_size		= cs_etm_info_priv_size;
+	ptr->itr.info_fill		= cs_etm_info_fill;
+	ptr->itr.find_snapshot		= cs_etm_find_snapshot;
+	ptr->itr.snapshot_start		= cs_etm_snapshot_start;
+	ptr->itr.snapshot_finish	= cs_etm_snapshot_finish;
+	ptr->itr.reference		= cs_etm_reference;
+	ptr->itr.free			= cs_etm_recording_free;
+	ptr->itr.read_finish		= cs_etm_read_finish;
+
+	*err = 0;
+	return &ptr->itr;
+out:
+	return NULL;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
new file mode 100644
index 0000000..909f486
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__PERF_CS_ETM_H__
+#define INCLUDE__PERF_CS_ETM_H__
+
+struct auxtrace_record *cs_etm_record_init(int *err);
+
+#endif
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 3876dd0..cef6fb3 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,4 +1,6 @@
 libperf-$(CONFIG_DWARF)     += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 
-libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
+			      ../../arm/util/auxtrace.o \
+			      ../../arm/util/cs-etm.o
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c916901..c0aba8e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -892,6 +892,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 		return intel_pt_process_auxtrace_info(event, session);
 	case PERF_AUXTRACE_INTEL_BTS:
 		return intel_bts_process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_CS_ETM:
 	case PERF_AUXTRACE_UNKNOWN:
 	default:
 		return -EINVAL;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index ac5f0d7..09286f1 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -41,6 +41,7 @@ enum auxtrace_type {
 	PERF_AUXTRACE_UNKNOWN,
 	PERF_AUXTRACE_INTEL_PT,
 	PERF_AUXTRACE_INTEL_BTS,
+	PERF_AUXTRACE_CS_ETM,
 };
 
 enum itrace_period_type {
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 0000000..3cc6bc3
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+/* Versionning header in case things need tro change in the future.  That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+	/* Starting with 0x0 */
+	CS_HEADER_VERSION_0,
+	/* PMU->type (32 bit), total # of CPUs (32 bit) */
+	CS_PMU_TYPE_CPUS,
+	CS_ETM_SNAPSHOT,
+	CS_HEADER_VERSION_0_MAX,
+};
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+	CS_ETM_MAGIC,
+	CS_ETM_CPU,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETM_ETMCR = CS_ETM_CPU + 1,
+	CS_ETM_ETMTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETM_ETMCCER,
+	CS_ETM_ETMIDR,
+	CS_ETM_PRIV_MAX,
+};
+
+/* ETMv4 metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+	CS_ETMV4_TRCTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETMV4_TRCIDR0,
+	CS_ETMV4_TRCIDR1,
+	CS_ETMV4_TRCIDR2,
+	CS_ETMV4_TRCIDR8,
+	CS_ETMV4_TRCAUTHSTATUS,
+	CS_ETMV4_PRIV_MAX,
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+
+static const u64 __perf_cs_etmv3_magic   = 0x3030303030303030ULL;
+static const u64 __perf_cs_etmv4_magic   = 0x4040404040404040ULL;
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+
+#endif
-- 
cgit v0.10.2


From 859442bd3fcbe326a9c0174c6105c938eb101438 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:50:02 -0600
Subject: perf pmu: Push configuration down to PMU driver

This patch adds a PMU callback and the required mechanic so that drivers
can process the command line configuration elements found in
evsel::config_terms.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-6-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 96f99d6..eb60e61 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -86,6 +86,7 @@ libperf-y += term.o
 libperf-y += help-unknown-cmd.o
 libperf-y += mem-events.o
 libperf-y += vsprintf.o
+libperf-y += drv_configs.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
new file mode 100644
index 0000000..1647f28
--- /dev/null
+++ b/tools/perf/util/drv_configs.c
@@ -0,0 +1,77 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+			      struct perf_evsel_config_term **err_term)
+{
+	bool found = false;
+	int err = 0;
+	struct perf_evsel_config_term *term;
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	list_for_each_entry(term, &evsel->config_terms, list) {
+		if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+			continue;
+
+		/*
+		 * We have a configuration term, report an error if we
+		 * can't find the PMU or if the PMU driver doesn't support
+		 * cmd line driver configuration.
+		 */
+		if (!found || !pmu->set_drv_config) {
+			err = -EINVAL;
+			*err_term = term;
+			break;
+		}
+
+		err = pmu->set_drv_config(term);
+		if (err) {
+			*err_term = term;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **err_term)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = perf_evsel__apply_drv_configs(evsel, err_term);
+		if (err) {
+			*err_evsel = evsel;
+			break;
+		}
+	}
+
+	return err;
+}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
new file mode 100644
index 0000000..32bc9ba
--- /dev/null
+++ b/tools/perf/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **term);
+#endif
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5d7e844..743422a 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
 #include <linux/bitmap.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
+#include "evsel.h"
 #include "parse-events.h"
 
 enum {
@@ -25,6 +26,7 @@ struct perf_pmu {
 	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
 	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
 	struct list_head list;    /* ELEM */
+	int (*set_drv_config)	(struct perf_evsel_config_term *term);
 };
 
 struct perf_pmu_info {
-- 
cgit v0.10.2


From 5d8bb1ec7477e0e53dbd891733682a6583d4398e Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:50:03 -0600
Subject: perf tools: Add PMU configuration to tools

Now that the required mechanic is there to deal with PMU specific
configuration, add the functionality to the tools where events can be
selected.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-7-git-send-email-mathieu.poirier@linaro.org
[ Fix the build on XSI-compliant systems, using str_error_r() to make sure we return a string, not an integer ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 03251c7..2d0d69b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/drv_configs.h"
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/symbol.h"
@@ -383,6 +384,7 @@ static int record__open(struct record *rec)
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_session *session = rec->session;
 	struct record_opts *opts = &rec->opts;
+	struct perf_evsel_config_term *err_term;
 	int rc = 0;
 
 	perf_evlist__config(evlist, opts, &callchain_param);
@@ -412,6 +414,14 @@ try_again:
 		goto out;
 	}
 
+	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		rc = -1;
+		goto out;
+	}
+
 	rc = record__mmap(rec);
 	if (rc)
 		goto out;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 90882b1..688dea7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/drv_configs.h"
 #include "util/color.h"
 #include "util/stat.h"
 #include "util/header.h"
@@ -540,6 +541,7 @@ static int __run_perf_stat(int argc, const char **argv)
 	int status = 0;
 	const bool forks = (argc > 0);
 	bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
+	struct perf_evsel_config_term *err_term;
 
 	if (interval) {
 		ts.tv_sec  = interval / USEC_PER_MSEC;
@@ -611,6 +613,13 @@ try_again:
 		return -1;
 	}
 
+	if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		return -1;
+	}
+
 	if (STAT_RECORD) {
 		int err, fd = perf_data_file__fd(&perf_stat.file);
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4007857..fe3af95 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -24,6 +24,7 @@
 #include "util/annotate.h"
 #include "util/config.h"
 #include "util/color.h"
+#include "util/drv_configs.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/machine.h"
@@ -913,6 +914,10 @@ static int callchain_param__setup_sample_type(struct callchain_param *callchain)
 
 static int __cmd_top(struct perf_top *top)
 {
+	char msg[512];
+	struct perf_evsel *pos;
+	struct perf_evsel_config_term *err_term;
+	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 	pthread_t thread;
 	int ret;
@@ -947,6 +952,14 @@ static int __cmd_top(struct perf_top *top)
 	if (ret)
 		goto out_delete;
 
+	ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+	if (ret) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+			err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+			str_error_r(errno, msg, sizeof(msg)));
+		goto out_delete;
+	}
+
 	top->session->evlist = top->evlist;
 	perf_session__set_id_hdr_size(top->session);
 
-- 
cgit v0.10.2


From 3becf4525d9c0fb9cf8ff657b2aec1c733bc742f Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 09:50:04 -0600
Subject: perf tools: Add sink configuration for cs_etm PMU

Using the PMU::set_drv_config() callback to enable the CoreSight sink
that will be used for the trace session.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474041004-13956-8-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 829c479..47d584d 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -27,12 +27,16 @@
 #include "../../util/auxtrace.h"
 #include "../../util/cpumap.h"
 #include "../../util/evlist.h"
+#include "../../util/evsel.h"
 #include "../../util/pmu.h"
 #include "../../util/thread_map.h"
 #include "../../util/cs-etm.h"
 
 #include <stdlib.h>
 
+#define ENABLE_SINK_MAX	128
+#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
+
 struct cs_etm_recording {
 	struct auxtrace_record	itr;
 	struct perf_pmu		*cs_etm_pmu;
@@ -557,3 +561,57 @@ struct auxtrace_record *cs_etm_record_init(int *err)
 out:
 	return NULL;
 }
+
+static FILE *cs_device__open_file(const char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
+
+	printf("path: %s\n", path);
+
+	if (stat(path, &st) < 0)
+		return NULL;
+
+	return fopen(path, "w");
+
+}
+
+static __attribute__((format(printf, 2, 3)))
+int cs_device__print_file(const char *name, const char *fmt, ...)
+{
+	va_list args;
+	FILE *file;
+	int ret = -EINVAL;
+
+	va_start(args, fmt);
+	file = cs_device__open_file(name);
+	if (file) {
+		ret = vfprintf(file, fmt, args);
+		fclose(file);
+	}
+	va_end(args);
+	return ret;
+}
+
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
+{
+	int ret;
+	char enable_sink[ENABLE_SINK_MAX];
+
+	snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
+		 term->val.drv_cfg, "enable_sink");
+
+	ret = cs_device__print_file(enable_sink, "%d", 1);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
index 909f486..5256741 100644
--- a/tools/perf/arch/arm/util/cs-etm.h
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -18,6 +18,9 @@
 #ifndef INCLUDE__PERF_CS_ETM_H__
 #define INCLUDE__PERF_CS_ETM_H__
 
+#include "../../util/evsel.h"
+
 struct auxtrace_record *cs_etm_record_init(int *err);
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
 
 #endif
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index af9fb66..98d6739 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -19,6 +19,7 @@
 #include <linux/coresight-pmu.h>
 #include <linux/perf_event.h>
 
+#include "cs-etm.h"
 #include "../../util/pmu.h"
 
 struct perf_event_attr
@@ -28,6 +29,7 @@ struct perf_event_attr
 	if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
 		/* add ETM default config here */
 		pmu->selectable = true;
+		pmu->set_drv_config = cs_etm_set_drv_config;
 	}
 #endif
 	return NULL;
-- 
cgit v0.10.2


From 9da44db1493a9d384ddc1bcd1553a1803ff985b6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:29 +0200
Subject: perf hists: Add __hist_entry__snprintf function

Add __hist_entry__snprintf() to take a perf_hpp_list as an argument
instead of using he->hists->hpp_list.

This way we can display arbitrary list of entries regardless of the
hists setup, which will be useful in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index c8dca34..189665c 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
 	return 0;
 }
 
-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+				  struct perf_hpp_list *hpp_list)
 {
 	const char *sep = symbol_conf.field_sep;
 	struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 
-	hists__for_each_format(he->hists, fmt) {
+	perf_hpp_list__for_each_format(hpp_list, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -410,6 +411,11 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 	return hpp->buf - start;
 }
 
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+	return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
 static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
 					 struct perf_hpp *hpp,
 					 struct hists *hists,
-- 
cgit v0.10.2


From bcf98740a28579d9412afa9a72e463da386a55a6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:32 +0200
Subject: perf tools: Make reset_dimensions global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 1884d7f..9e1f6f7 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2748,7 +2748,7 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
 	return ret;
 }
 
-static void reset_dimensions(void)
+void reset_dimensions(void)
 {
 	unsigned int i;
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 9505483..4efadc1 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -269,4 +269,5 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
 bool is_strict_order(const char *order);
 
 int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
 #endif	/* __PERF_SORT_H */
-- 
cgit v0.10.2


From a76490e4cd5d971d6f6c22aeed0625bb352d2a08 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:33 +0200
Subject: perf tools: Make output_field_add and sort_dimension__add global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9e1f6f7..9f7c1ea 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2308,9 +2308,9 @@ int hpp_dimension__add_output(unsigned col)
 	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
-			       struct perf_evlist *evlist,
-			       int level)
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level)
 {
 	unsigned int i;
 
@@ -2685,7 +2685,7 @@ void sort__setup_elide(FILE *output)
 	}
 }
 
-static int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, char *tok)
 {
 	unsigned int i;
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4efadc1..e93b0fa 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -270,4 +270,8 @@ bool is_strict_order(const char *order);
 
 int hpp_dimension__add_output(unsigned col);
 void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
 #endif	/* __PERF_SORT_H */
-- 
cgit v0.10.2


From 5fe7b9b47c646dbe8501378eb3684ccd802d6d25 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:34 +0200
Subject: perf tools: Make several sorting functions global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-7-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9f7c1ea..452e15a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -867,7 +867,7 @@ struct sort_entry sort_cycles = {
 };
 
 /* --sort daddr_sym */
-static int64_t
+int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t l = 0, r = 0;
@@ -896,7 +896,7 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
 					 width);
 }
 
-static int64_t
+int64_t
 sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t l = 0, r = 0;
@@ -1062,7 +1062,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static int64_t
+int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	u64 l, r;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e93b0fa..099c975 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -274,4 +274,10 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 			struct perf_evlist *evlist,
 			int level);
 int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
 #endif	/* __PERF_SORT_H */
-- 
cgit v0.10.2


From 98ba1609298ccc849ddfe727c675c3a7b48b8dbc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:35 +0200
Subject: perf tools: Make several display functions global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-8-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a6d5d24..fb8e42c 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1080,7 +1080,7 @@ struct hpp_arg {
 	bool current_entry;
 };
 
-static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
 {
 	struct hpp_arg *arg = hpp->ptr;
 	int ret, len;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 60c4a4d..3738839 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -237,7 +237,7 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
 }
 
-static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
 {
 	va_list args;
 	ssize_t ssize = hpp->size;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index defa957..6150b94 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -485,5 +485,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
 #define HIERARCHY_INDENT  3
 
 bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
 
 #endif	/* __PERF_HIST_H */
-- 
cgit v0.10.2


From bd28d0c59805b88001fcc8ad5c6f913d86d8e5c2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:36 +0200
Subject: perf hists: Make __hist_entry__snprintf function global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-9-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 189665c..a2a50ef 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
 	return 0;
 }
 
-static int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
-				  struct perf_hpp_list *hpp_list)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list)
 {
 	const char *sep = symbol_conf.field_sep;
 	struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6150b94..ecc4c0c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -487,5 +487,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
 bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
 int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
 int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list);
 
 #endif	/* __PERF_HIST_H */
-- 
cgit v0.10.2


From 2d831454140f28fa643b78deede4511b9e2c9e5f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 22 Sep 2016 17:36:37 +0200
Subject: perf hists: Make hists__fprintf_headers function global

Will be used from external places in the upcoming c2c patch series.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-10-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a2a50ef..89d8441 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -702,7 +702,7 @@ hists__fprintf_standard_headers(struct hists *hists,
 	return hpp_list->nr_header_lines + 2;
 }
 
-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
 {
 	char bf[1024];
 	struct perf_hpp dummy_hpp = {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ecc4c0c..9928fed 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -489,5 +489,6 @@ int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
 int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
 int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
 			   struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
 
 #endif	/* __PERF_HIST_H */
-- 
cgit v0.10.2


From a9e57009dacd58052755cf58463ce41a14a01db5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:33 +0300
Subject: perf record: Fix documentation 'event_sources' -> 'event_source'

Change '/sys/bus/event_sources' to the correct path which is
'/sys/bus/event_source'.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 1a24f4d..babbb63 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -35,15 +35,15 @@ OPTIONS
 
 	- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
 	  'param1', 'param2', etc are defined as formats for the PMU in
-	  /sys/bus/event_sources/devices/<pmu>/format/*.
+	  /sys/bus/event_source/devices/<pmu>/format/*.
 
 	- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
 
           where M, N, K are numbers (in decimal, hex, octal format). Acceptable
           values for each of 'config', 'config1' and 'config2' are defined by
-          corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/*
+          corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
           param1 and param2 are defined as formats for the PMU in:
-          /sys/bus/event_sources/devices/<pmu>/format/*
+          /sys/bus/event_source/devices/<pmu>/format/*
 
 	  There are also some params which are not defined in .../<pmu>/format/*.
 	  These params can be used to overload default config values per event.
-- 
cgit v0.10.2


From 973186ca7fe94d770a9847d7d530864de6ab638b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:34 +0300
Subject: perf tools: Fix MMAP event synthesis broken by MAP_HUGETLB change

Patch "perf record: Mark MAP_HUGETLB when synthesizing mmap events") breaks
MMAP event synthesis.  The executable name comparison will match any name
if the length is zero, resulting in all the user space maps becoming
anonymous.  This is particularly noticeable with system-wide traces.
Example:

	perf record -a sleep 1
	perf script --show-mmap-events

Committer note:

That is not the case when, say, one has a qemu instance and libvirt actually
mounts hugetlbfs. To test this I had to first umount it:

[root@jouet ~]# mount | grep hugetlbfs
hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,seclabel)
[root@jouet ~]#

After unmount it the error fixed by this patch manifests itself:

  # perf record -a sleep 1
  # perf script --show-mmap-events | grep PERF_RECORD_MMAP2 | head -5
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x557d47ed8000(0x167000) @ 0 fd:00 3146896 7362875424355726126]: r-xp //anon
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c488d000(0x4000) @ 0 fd:00 3153214 7362875424355726126]: r-xp //anon
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4a92000(0x3d000) @ 0 fd:00 3159276 7362875424355726126]: r-xp //anon
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4cd5000(0x15000) @ 0 fd:00 3153725 7362875424355726126]: r-xp //anon
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4eeb000(0x25000) @ 0 fd:00 3153260 7362875424355726126]: r-xp //anon
  #

Fixed version:

  # perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.419 MB perf.data (182 samples) ]
  # perf script --show-mmap-events | grep PERF_RECORD_MMAP2 | head -5
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x557d47ed8000(0x167000) @ 0 fd:00 3146896 7362875424355726126]: r-xp /usr/lib/systemd/systemd
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c488d000(0x4000) @ 0 fd:00 3153214 7362875424355726126]: r-xp /usr/lib64/libuuid.so.1.3.0
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4a92000(0x3d000) @ 0 fd:00 3159276 7362875424355726126]: r-xp /usr/lib64/libblkid.so.1.1.0
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4cd5000(0x15000) @ 0 fd:00 3153725 7362875424355726126]: r-xp /usr/lib64/libz.so.1.2.8
    systemd 0 [000] 0.000000: PERF_RECORD_MMAP2 1/1: [0x7f96c4eeb000(0x25000) @ 0 fd:00 3153260 7362875424355726126]: r-xp /usr/lib64/liblzma.so.5.2.2
[root@jouet ~]#

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2880e22..8ab0d7d 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -346,7 +346,8 @@ out:
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
-		if (!strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+		if (hugetlbfs_mnt_len &&
+		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
 			strcpy(execname, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
-- 
cgit v0.10.2


From f2c8852e6e990fcab0d9e68de9d86e5fbea0b5dc Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 28 Sep 2016 03:58:46 +0000
Subject: perf data: Fix building in 32 bit platform with libbabeltrace

On ARM32 building it report following error when we build with
libbabeltrace:

  util/data-convert-bt.c: In function 'add_bpf_output_values':
  util/data-convert-bt.c:440:3: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'unsigned int' [-Werror=format]
  cc1: all warnings being treated as errors

Fix it by changing %lu to %zu.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Fixes: 6122d57e9f7c ("perf data: Support converting data from bpf_perf_event_output()")
Link: http://lkml.kernel.org/r/1475035126-146587-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4f979bb..7123f4d 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -437,7 +437,7 @@ add_bpf_output_values(struct bt_ctf_event_class *event_class,
 	int ret;
 
 	if (nr_elements * sizeof(u32) != raw_size)
-		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
 			   raw_size, nr_elements * sizeof(u32) - raw_size);
 
 	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
-- 
cgit v0.10.2


From f0bbd602268e69f4f428075c645391d64bdd6c9f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 28 Sep 2016 13:45:38 -0300
Subject: perf trace: Beautify sched_[gs]et_attr return value

Both return errno, show the string associated then.

More work needed to capture the sched_attr arg to beautify it in turn,
probably using BPF.

Before:

     0.210 ( 0.001 ms): sched_setattr(uattr: 0x7ffc684f02b0) = -22

After the patch, for this sched_attr, all other parms are zero, so not
shown:

        struct sched_attr attr = {
                .size           = sizeof(attr),
                .sched_policy   = SCHED_DEADLINE,
                .sched_runtime  = 10 * USECS_PER_SEC,
                .sched_period   = 30 * USECS_PER_SEC,
                .sched_deadline = attr.sched_period,
        };

     0.321 ( 0.002 ms): sched_setattr(uattr: 0x7ffc44116da0) = -1 EINVAL Invalid argument

  [root@jouet c]# perf trace -e sched_setattr ./sched_deadline
  Couldn't negotiate deadline: Invalid argument
     0.229 ( 0.003 ms): sched_setattr(uattr: 0x7ffd8dcd8df0) = -1 EINVAL Invalid argument
  [root@jouet c]#

Now to figure out the reason for this EINVAL.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-tyot2n7e48zm8pdw8tbcm3sl@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b4fc1ab..1a54ce9 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -742,6 +742,8 @@ static struct syscall_fmt {
 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "sched_getattr",	      .errmsg = true, },
+	{ .name	    = "sched_setattr",	      .errmsg = true, },
 	{ .name	    = "sched_setscheduler",   .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
 	{ .name	    = "seccomp", .errmsg = true,
-- 
cgit v0.10.2


From 2acad19500c28ce0c4dc3f9bf1dcfc82040b6531 Mon Sep 17 00:00:00 2001
From: Simon Que <sque@chromium.org>
Date: Wed, 28 Sep 2016 11:37:53 -0700
Subject: perf tools: Update documentation info about quipper

The existing link is outdated. The most recent quipper code can be found at the
new URL.

Committer notes:

Quipper is a C++ parser that can be used to convert from a perf.data
file to and from a protobuf, a Chromium OS facility.

Signed-off-by: Simon Que <sque@chromium.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Chong Jiang <chongjiang@chromium.org>
Link: http://lkml.kernel.org/n/tip-4q1nm7jl3vovp66p5bki20pq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fdc99fe..b664b18 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -437,6 +437,10 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
 quipper
 
 The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+https://chromium.googlesource.com/chromiumos/platform2
+
+It is under the chromiumos-wide-profiling/ subdirectory. This library can
+convert a perf data file to a protobuf and vice versa.
+
 Unfortunately this parser tends to be many versions behind and may not be able
 to parse data files generated by recent perf.
-- 
cgit v0.10.2


From b15d0a4c828eafc82ea68fcf88db6fa93eeb23d7 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 08:44:03 -0600
Subject: perf tools: Make perf_evsel__append_filter() generic

By making function perf_evsel__append_filter() take a format rather than
an operator it is possible to reuse the code for other purposes (ex.
Intel PT and CoreSight) than tracepoints.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474037045-31730-2-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1a54ce9..e04ba9d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2143,6 +2143,7 @@ out_delete_sys_enter:
 static int trace__set_ev_qualifier_filter(struct trace *trace)
 {
 	int err = -1;
+	struct perf_evsel *sys_exit;
 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
 						trace->ev_qualifier_ids.nr,
 						trace->ev_qualifier_ids.entries);
@@ -2150,8 +2151,12 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
 	if (filter == NULL)
 		goto out_enomem;
 
-	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
-		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
+	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter,
+				       "(%s) && (%s)", filter)) {
+		sys_exit = trace->syscalls.events.sys_exit;
+		err = perf_evsel__append_filter(sys_exit,
+						"(%s) && (%s)", filter);
+	}
 
 	free(filter);
 out:
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f3225a2..4f327b5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1046,14 +1046,14 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
 }
 
 int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *op, const char *filter)
+			      const char *fmt, const char *filter)
 {
 	char *new_filter;
 
 	if (evsel->filter == NULL)
 		return perf_evsel__set_filter(evsel, filter);
 
-	if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
+	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
 		free(evsel->filter);
 		evsel->filter = new_filter;
 		return 0;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3238060..7ef9602 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -236,7 +236,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
 int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *op, const char *filter);
+			      const char *fmt, const char *filter);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			     const char *filter);
 int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2eb8b1e..b14784c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1767,7 +1767,7 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
 		return -1;
 	}
 
-	if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
+	if (perf_evsel__append_filter(evsel, "(%s) && (%s)", str) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
@@ -1798,7 +1798,7 @@ static int add_exclude_perf_filter(struct perf_evsel *evsel,
 
 	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
 
-	if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
+	if (perf_evsel__append_filter(evsel, "(%s) && (%s)", new_filter) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
-- 
cgit v0.10.2


From 3541c034d9b953cbd4b961db74630fb6d72e7f37 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 08:44:04 -0600
Subject: perf evsel: New tracepoint specific function

Making function perf_evsel__append_filter() static and introducing a new
tracepoint specific function to append filters.  That way we eliminate
redundant code and avoid formatting mistake.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474037045-31730-3-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e04ba9d..c298bd3 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2151,11 +2151,10 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
 	if (filter == NULL)
 		goto out_enomem;
 
-	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter,
-				       "(%s) && (%s)", filter)) {
+	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+					  filter)) {
 		sys_exit = trace->syscalls.events.sys_exit;
-		err = perf_evsel__append_filter(sys_exit,
-						"(%s) && (%s)", filter);
+		err = perf_evsel__append_tp_filter(sys_exit, filter);
 	}
 
 	free(filter);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4f327b5..3b4e7c4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1045,8 +1045,8 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
 	return -1;
 }
 
-int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *fmt, const char *filter)
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+				     const char *fmt, const char *filter)
 {
 	char *new_filter;
 
@@ -1062,6 +1062,11 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
 	return -1;
 }
 
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
 	int nthreads = thread_map__nr(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 7ef9602..1f8c48f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -235,8 +235,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,
 			       bool use_sample_identifier);
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
-int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *fmt, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			     const char *filter);
 int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b14784c..16bf09c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1767,7 +1767,7 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
 		return -1;
 	}
 
-	if (perf_evsel__append_filter(evsel, "(%s) && (%s)", str) < 0) {
+	if (perf_evsel__append_tp_filter(evsel, str) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
@@ -1798,7 +1798,7 @@ static int add_exclude_perf_filter(struct perf_evsel *evsel,
 
 	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
 
-	if (perf_evsel__append_filter(evsel, "(%s) && (%s)", new_filter) < 0) {
+	if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
-- 
cgit v0.10.2


From 1e85748437ba5fd05abe5396d67062e0b4b502f2 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 16 Sep 2016 08:44:05 -0600
Subject: perf evsel: Add support for address filters

This patch makes it possible to use the current filter framework with
address filters.  That way address filters for HW tracers such as
CoreSight and Intel PT can be communicated to the kernel drivers.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1474037045-31730-4-git-send-email-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3b4e7c4..380e84c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1067,6 +1067,11 @@ int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
 	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
 }
 
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
 	int nthreads = thread_map__nr(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f8c48f..b1503b0 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -236,6 +236,8 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel,
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
 int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+				   const char *filter);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			     const char *filter);
 int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 16bf09c..33546c3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1760,20 +1760,49 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist,
 static int set_filter(struct perf_evsel *evsel, const void *arg)
 {
 	const char *str = arg;
+	bool found = false;
+	int nr_addr_filters = 0;
+	struct perf_pmu *pmu = NULL;
 
-	if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
-		fprintf(stderr,
-			"--filter option should follow a -e tracepoint option\n");
-		return -1;
+	if (evsel == NULL)
+		goto err;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+			fprintf(stderr,
+				"not enough memory to hold filter string\n");
+			return -1;
+		}
+
+		return 0;
 	}
 
-	if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	if (found)
+		perf_pmu__scan_file(pmu, "nr_addr_filters",
+				    "%d", &nr_addr_filters);
+
+	if (!nr_addr_filters)
+		goto err;
+
+	if (perf_evsel__append_addr_filter(evsel, str) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
 	}
 
 	return 0;
+
+err:
+	fprintf(stderr,
+		"--filter option should follow a -e tracepoint or HW tracer option\n");
+
+	return -1;
 }
 
 int parse_filter(const struct option *opt, const char *str,
-- 
cgit v0.10.2


From e7a06a5353574cac3a34211bd4e514bb8d00d766 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:35 +0300
Subject: perf script: Fix vanished idle symbols

Commit 608c34de0b3d ("perf symbols: Mark if a symbol is idle in the
library") causes idle symbols to vanish from perf script output. That is
because print functions suppress symbols marked as 'idle'.

However, suppression of 'idle' functions is only used by 'perf top' and
'perf top' does not use the print functions.  Consequently that
functionality can simply be removed from the print functions.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Fixes: 608c34de0b3d ("perf symbols: Mark if a symbol is idle in the library")
Link: http://lkml.kernel.org/r/1474641528-18776-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 9111e06..662a0a6 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -122,9 +122,6 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!node)
 				break;
 
-			if (node->sym && node->sym->idle)
-				goto next;
-
 			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
 			if (print_ip)
@@ -158,7 +155,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
-next:
+
 			callchain_cursor_advance(cursor);
 		}
 	}
@@ -181,7 +178,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
 	if (cursor != NULL) {
 		printed += sample__fprintf_callchain(sample, left_alignment,
 						     print_opts, cursor, fp);
-	} else if (!(al->sym && al->sym->idle)) {
+	} else {
 		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
 		if (print_ip)
-- 
cgit v0.10.2


From 394c01ed8aafdf0f14fb18c6d320d8a783da243c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:36 +0300
Subject: perf record: Rename label 'out_symbol_exit'

In preparation for fixing the error paths, rename label
'out_symbol_exit' to be 'out' because that error path can be used
irrespective of whether symbols (or anything else) has been initialized.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2d0d69b..b32a880 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1643,7 +1643,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (rec->evlist->nr_entries == 0 &&
 	    perf_evlist__add_default(rec->evlist) < 0) {
 		pr_err("Not enough memory for event selector list\n");
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
@@ -1663,7 +1663,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		ui__error("%s", errbuf);
 
 		err = -saved_errno;
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	err = -ENOMEM;
@@ -1672,7 +1672,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
 	if (err)
-		goto out_symbol_exit;
+		goto out;
 
 	/*
 	 * We take all buildids when the file contains
@@ -1684,11 +1684,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	if (record_opts__config(&rec->opts)) {
 		err = -EINVAL;
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	err = __cmd_record(&record, argc, argv);
-out_symbol_exit:
+out:
 	perf_evlist__delete(rec->evlist);
 	symbol__exit();
 	auxtrace_record__free(rec->itr);
-- 
cgit v0.10.2


From 5c01ad60b8a23f8ff59b9a5a756f07ed08f0b6d1 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:37 +0300
Subject: perf record: Fix error paths

Some error paths do not tidy-up. Fix that.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b32a880..962adcf 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1573,23 +1573,23 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (!rec->itr) {
 		rec->itr = auxtrace_record__init(rec->evlist, &err);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
 					      rec->opts.auxtrace_snapshot_opts);
 	if (err)
-		return err;
+		goto out;
 
 	if (dry_run)
-		return 0;
+		goto out;
 
 	err = bpf__setup_stdout(rec->evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
 			 errbuf);
-		return err;
+		goto out;
 	}
 
 	err = -ENOMEM;
-- 
cgit v0.10.2


From cd67f99fe90dcf515f1c70c474b84d56b6236cbb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:38 +0300
Subject: perf symbols: Add dso__last_symbol()

Add a function to find the last symbol in a DSO. This will be used when
parsing address filters to calculate a region that includes the entire
DSO.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 19c9c55..aecff69 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -345,6 +345,16 @@ static struct symbol *symbols__first(struct rb_root *symbols)
 	return NULL;
 }
 
+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_last(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
 static struct symbol *symbols__next(struct symbol *sym)
 {
 	struct rb_node *n = rb_next(&sym->rb_node);
@@ -466,6 +476,11 @@ struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
 	return symbols__first(&dso->symbols[type]);
 }
 
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__last(&dso->symbols[type]);
+}
+
 struct symbol *dso__next_symbol(struct symbol *sym)
 {
 	return symbols__next(sym);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0dacfb7..d964844 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -259,6 +259,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 struct symbol *symbol__next_by_name(struct symbol *sym);
 
 struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
 struct symbol *dso__next_symbol(struct symbol *sym);
 
 enum dso_type dso__type_fd(int fd);
-- 
cgit v0.10.2


From 1b36c03e356936d62abbe2accaae1573d3b66f14 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:39 +0300
Subject: perf record: Add support for using symbols in address filters

Symbols come from either the DSO or /proc/kallsyms for the kernel.
Details of the functionality can be found in Documentation/perf-record.txt.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index babbb63..9233519 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -89,9 +89,62 @@ OPTIONS
 
 --filter=<filter>::
         Event filter. This option should follow a event selector (-e) which
-	selects tracepoint event(s). Multiple '--filter' options are combined
+	selects either tracepoint event(s) or a hardware trace PMU
+	(e.g. Intel PT or CoreSight).
+
+	- tracepoint filters
+
+	In the case of tracepoints, multiple '--filter' options are combined
 	using '&&'.
 
+	- address filters
+
+	A hardware trace PMU advertises its ability to accept a number of
+	address filters	by specifying a non-zero value in
+	/sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+	Address filters have the format:
+
+	filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+	Where:
+	- 'filter': defines a region that will be traced.
+	- 'start': defines an address at which tracing will begin.
+	- 'stop': defines an address at which tracing will stop.
+	- 'tracestop': defines a region in which tracing will stop.
+
+	<file name> is the name of the object file, <start> is the offset to the
+	code to trace in that file, and <size> is the size of the region to
+	trace. 'start' and 'stop' filters need not specify a <size>.
+
+	If no object file is specified then the kernel is assumed, in which case
+	the start address must be a current kernel memory address.
+
+	<start> can also be specified by providing the name of a symbol. If the
+	symbol name is not unique, it can be disambiguated by inserting #n where
+	'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+	select only a global symbol. <size> can also be specified by providing
+	the name of a symbol, in which case the size is calculated to the end
+	of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+	omitted and <start> is a symbol, then the size is calculated to the end
+	of that symbol.
+
+	If <size> is omitted and <start> is '*', then the start and size will
+	be calculated from the first and last symbols, i.e. to trace the whole
+	file.
+
+	If symbol names (or '*') are provided, they must be surrounded by white
+	space.
+
+	The filter passed to the kernel is not necessarily the same as entered.
+	To see the filter that is passed, use the -v option.
+
+	The kernel may not be able to configure a trace region if it is not
+	within a single mapping.  MMAP events (or /proc/<pid>/maps) can be
+	examined to determine if that is a possibility.
+
+	Multiple filters can be separated with space or comma.
+
 --exclude-perf::
 	Don't record events issued by perf itself. This option should follow
 	a event selector (-e) which selects tracepoint event(s). It adds a
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 962adcf..67d2a90 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1581,6 +1581,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (err)
 		goto out;
 
+	/*
+	 * Allow aliases to facilitate the lookup of symbols for address
+	 * filters. Refer to auxtrace_parse_filters().
+	 */
+	symbol_conf.allow_aliases = true;
+
+	symbol__init(NULL);
+
+	err = auxtrace_parse_filters(rec->evlist);
+	if (err)
+		goto out;
+
 	if (dry_run)
 		goto out;
 
@@ -1594,8 +1606,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	err = -ENOMEM;
 
-	symbol__init(NULL);
-
 	if (symbol_conf.kptr_restrict)
 		pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c0aba8e..c5a6e0b 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -16,6 +16,10 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
 
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
@@ -35,9 +39,14 @@
 #include "../perf.h"
 #include "util.h"
 #include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
 #include "cpumap.h"
 #include "thread_map.h"
 #include "asm/bug.h"
+#include "symbol/kallsyms.h"
 #include "auxtrace.h"
 
 #include <linux/hash.h>
@@ -1399,3 +1408,731 @@ void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
 
 	return NULL;
 }
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+	free(filt->str);
+	filt->action   = NULL;
+	filt->sym_from = NULL;
+	filt->sym_to   = NULL;
+	filt->filename = NULL;
+	filt->str      = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+	struct addr_filter *filt = zalloc(sizeof(*filt));
+
+	if (filt)
+		INIT_LIST_HEAD(&filt->list);
+
+	return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+	if (filt)
+		addr_filter__free_str(filt);
+	free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_add_tail(&filt->list, &filts->head);
+	filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_del_init(&filt->list);
+	filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+	INIT_LIST_HEAD(&filts->head);
+	filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+	struct addr_filter *filt, *n;
+
+	list_for_each_entry_safe(filt, n, &filts->head, list) {
+		addr_filters__del(filts, filt);
+		addr_filter__free(filt);
+	}
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+			    const char *str_delim)
+{
+	*inp += strspn(*inp, " ");
+
+	if (isdigit(**inp)) {
+		char *endptr;
+
+		if (!num)
+			return -EINVAL;
+		errno = 0;
+		*num = strtoull(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp)
+			return -EINVAL;
+		*inp = endptr;
+	} else {
+		size_t n;
+
+		if (!str)
+			return -EINVAL;
+		*inp += strspn(*inp, " ");
+		*str = *inp;
+		n = strcspn(*inp, str_delim);
+		if (!n)
+			return -EINVAL;
+		*inp += n;
+		if (**inp) {
+			**inp = '\0';
+			*inp += 1;
+		}
+	}
+	return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+	if (!strcmp(filt->action, "filter")) {
+		filt->start = true;
+		filt->range = true;
+	} else if (!strcmp(filt->action, "start")) {
+		filt->start = true;
+	} else if (!strcmp(filt->action, "stop")) {
+		filt->start = false;
+	} else if (!strcmp(filt->action, "tracestop")) {
+		filt->start = false;
+		filt->range = true;
+		filt->action += 5; /* Change 'tracestop' to 'stop' */
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+	*idx = -1;
+
+	*inp += strspn(*inp, " ");
+
+	if (**inp != '#')
+		return 0;
+
+	*inp += 1;
+
+	if (**inp == 'g' || **inp == 'G') {
+		*inp += 1;
+		*idx = 0;
+	} else {
+		unsigned long num;
+		char *endptr;
+
+		errno = 0;
+		num = strtoul(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp || num > INT_MAX)
+			return -EINVAL;
+		*inp = endptr;
+		*idx = num;
+	}
+
+	return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+	int err = parse_num_or_str(inp, num, str, " ");
+
+	if (!err && *str)
+		err = parse_sym_idx(inp, idx);
+
+	return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+	char *fstr;
+	int err;
+
+	filt->str = fstr = strdup(*filter_inp);
+	if (!fstr)
+		return -ENOMEM;
+
+	err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+	if (err)
+		goto out_err;
+
+	err = parse_action(filt);
+	if (err)
+		goto out_err;
+
+	err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+			      &filt->sym_from_idx);
+	if (err)
+		goto out_err;
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '/') {
+		fstr += 1;
+		err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+				      &filt->sym_to_idx);
+		if (err)
+			goto out_err;
+		filt->range = true;
+	}
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '@') {
+		fstr += 1;
+		err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+		if (err)
+			goto out_err;
+	}
+
+	fstr += strspn(fstr, " ,");
+
+	*filter_inp += fstr - filt->str;
+
+	return 0;
+
+out_err:
+	addr_filter__free_str(filt);
+
+	return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter)
+{
+	struct addr_filter *filt;
+	const char *fstr = filter;
+	int err;
+
+	while (*fstr) {
+		filt = addr_filter__new();
+		err = parse_one_filter(filt, &fstr);
+		if (err) {
+			addr_filter__free(filt);
+			addr_filters__exit(filts);
+			return err;
+		}
+		addr_filters__add(filts, filt);
+	}
+
+	return 0;
+}
+
+struct sym_args {
+	const char	*name;
+	u64		start;
+	u64		size;
+	int		idx;
+	int		cnt;
+	bool		started;
+	bool		global;
+	bool		selected;
+	bool		duplicate;
+	bool		near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+	/* A function with the same name, and global or the n'th found or any */
+	return symbol_type__is_a(type, MAP__FUNCTION) &&
+	       !strcmp(name, args->name) &&
+	       ((args->global && isupper(type)) ||
+		(args->selected && ++(args->cnt) == args->idx) ||
+		(!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (args->started) {
+		if (!args->size)
+			args->size = start - args->start;
+		if (args->selected) {
+			if (args->size)
+				return 1;
+		} else if (kern_sym_match(args, name, type)) {
+			args->duplicate = true;
+			return 1;
+		}
+	} else if (kern_sym_match(args, name, type)) {
+		args->started = true;
+		args->start = start;
+	}
+
+	return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (kern_sym_match(args, name, type)) {
+		pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+		       ++args->cnt, start, type, name);
+		args->near = true;
+	} else if (args->near) {
+		args->near = false;
+		pr_err("\t\twhich is near\t\t%s\n", name);
+	}
+
+	return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+	if (idx > 0) {
+		pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+		       idx, sym_name);
+	} else if (!idx) {
+		pr_err("Global symbol '%s' not found.\n", sym_name);
+	} else {
+		pr_err("Symbol '%s' not found.\n", sym_name);
+	}
+	pr_err("Note that symbols must be functions.\n");
+
+	return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+	struct sym_args args = {
+		.name = sym_name,
+		.idx = idx,
+		.global = !idx,
+		.selected = idx > 0,
+	};
+	int err;
+
+	*start = 0;
+	*size = 0;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+	if (err < 0) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	if (args.duplicate) {
+		pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+		args.cnt = 0;
+		kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+		pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+		       sym_name);
+		pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+		return -EINVAL;
+	}
+
+	if (!args.started) {
+		pr_err("Kernel symbol lookup: ");
+		return sym_not_found_error(sym_name, idx);
+	}
+
+	*start = args.start;
+	*size = args.size;
+
+	return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+			       char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION))
+		return 0;
+
+	if (!args->started) {
+		args->started = true;
+		args->start = start;
+	}
+	/* Don't know exactly where the kernel ends, so we add a page */
+	args->size = round_up(start, page_size) + page_size - args->start;
+
+	return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+	struct sym_args args = { .started = false };
+	int err;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+	if (err < 0 || !args.started) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	filt->addr = args.start;
+	filt->size = args.size;
+
+	return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+	if (start + size >= filt->addr)
+		return 0;
+
+	if (filt->sym_from) {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->sym_from, filt->addr);
+	} else {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->addr);
+	}
+
+	return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+	bool no_size = false;
+	u64 start, size;
+	int err;
+
+	if (symbol_conf.kptr_restrict) {
+		pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+		return addr_filter__entire_kernel(filt);
+
+	if (filt->sym_from) {
+		err = find_kern_sym(filt->sym_from, &start, &size,
+				    filt->sym_from_idx);
+		if (err)
+			return err;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to) {
+			filt->size = size;
+			no_size = !!size;
+		}
+	}
+
+	if (filt->sym_to) {
+		err = find_kern_sym(filt->sym_to, &start, &size,
+				    filt->sym_to_idx);
+		if (err)
+			return err;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+		filt->size = start + size - filt->addr;
+		no_size = !!size;
+	}
+
+	/* The very last symbol in kallsyms does not imply a particular size */
+	if (no_size) {
+		pr_err("Cannot determine size of symbol '%s'\n",
+		       filt->sym_to ? filt->sym_to : filt->sym_from);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+	struct map *map;
+	struct dso *dso;
+
+	map = dso__new_map(name);
+	if (!map)
+		return NULL;
+
+	map__load(map);
+
+	dso = dso__get(map->dso);
+
+	map__put(map);
+
+	return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+			  int idx)
+{
+	/* Same name, and global or the n'th found or any */
+	return !arch__compare_symbol_names(name, sym->name) &&
+	       ((!idx && sym->binding == STB_GLOBAL) ||
+		(idx > 0 && ++*cnt == idx) ||
+		idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+	struct symbol *sym;
+	bool near = false;
+	int cnt = 0;
+
+	pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+			       ++cnt, sym->start,
+			       sym->binding == STB_GLOBAL ? 'g' :
+			       sym->binding == STB_LOCAL  ? 'l' : 'w',
+			       sym->name);
+			near = true;
+		} else if (near) {
+			near = false;
+			pr_err("\t\twhich is near\t\t%s\n", sym->name);
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+	       sym_name);
+	pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+			u64 *size, int idx)
+{
+	struct symbol *sym;
+	int cnt = 0;
+
+	*start = 0;
+	*size = 0;
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (*start) {
+			if (!*size)
+				*size = sym->start - *start;
+			if (idx > 0) {
+				if (*size)
+					return 1;
+			} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+				print_duplicate_syms(dso, sym_name);
+				return -EINVAL;
+			}
+		} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+			*start = sym->start;
+			*size = sym->end - sym->start;
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	if (!*start)
+		return sym_not_found_error(sym_name, idx);
+
+	return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+	struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
+	struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+
+	if (!first_sym || !last_sym) {
+		pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+		       filt->filename);
+		return -EINVAL;
+	}
+
+	filt->addr = first_sym->start;
+	filt->size = last_sym->end - first_sym->start;
+
+	return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+	u64 start, size;
+	struct dso *dso;
+	int err = 0;
+
+	if (!filt->sym_from && !filt->sym_to)
+		return 0;
+
+	if (!filt->filename)
+		return addr_filter__resolve_kernel_syms(filt);
+
+	dso = load_dso(filt->filename);
+	if (!dso) {
+		pr_err("Failed to load symbols from: %s\n", filt->filename);
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+		err = addr_filter__entire_dso(filt, dso);
+		goto put_dso;
+	}
+
+	if (filt->sym_from) {
+		err = find_dso_sym(dso, filt->sym_from, &start, &size,
+				   filt->sym_from_idx);
+		if (err)
+			goto put_dso;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to)
+			filt->size = size;
+	}
+
+	if (filt->sym_to) {
+		err = find_dso_sym(dso, filt->sym_to, &start, &size,
+				   filt->sym_to_idx);
+		if (err)
+			goto put_dso;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+
+		filt->size = start + size - filt->addr;
+	}
+
+put_dso:
+	dso__put(dso);
+
+	return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+	char filename_buf[PATH_MAX];
+	const char *at = "";
+	const char *fn = "";
+	char *filter;
+	int err;
+
+	if (filt->filename) {
+		at = "@";
+		fn = realpath(filt->filename, filename_buf);
+		if (!fn)
+			return NULL;
+	}
+
+	if (filt->range) {
+		err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, filt->size, at, fn);
+	} else {
+		err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, at, fn);
+	}
+
+	return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+			     int max_nr)
+{
+	struct addr_filters filts;
+	struct addr_filter *filt;
+	int err;
+
+	addr_filters__init(&filts);
+
+	err = addr_filters__parse_bare_filter(&filts, filter);
+	if (err)
+		goto out_exit;
+
+	if (filts.cnt > max_nr) {
+		pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+		       filts.cnt, max_nr);
+		err = -EINVAL;
+		goto out_exit;
+	}
+
+	list_for_each_entry(filt, &filts.head, list) {
+		char *new_filter;
+
+		err = addr_filter__resolve_syms(filt);
+		if (err)
+			goto out_exit;
+
+		new_filter = addr_filter__to_str(filt);
+		if (!new_filter) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+
+		if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+	}
+
+out_exit:
+	addr_filters__exit(&filts);
+
+	if (err) {
+		pr_err("Failed to parse address filter: '%s'\n", filter);
+		pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+		pr_err("Where multiple filters are separated by space or comma.\n");
+	}
+
+	return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu->type == evsel->attr.type)
+			break;
+	}
+
+	return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+	int nr_addr_filters = 0;
+
+	if (!pmu)
+		return 0;
+
+	perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+	return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	char *filter;
+	int err, max_nr;
+
+	evlist__for_each_entry(evlist, evsel) {
+		filter = evsel->filter;
+		max_nr = perf_evsel__nr_addr_filter(evsel);
+		if (!filter || !max_nr)
+			continue;
+		evsel->filter = NULL;
+		err = parse_addr_filter(evsel, filter, max_nr);
+		free(filter);
+		if (err)
+			return err;
+		pr_debug("Address filter: %s\n", evsel->filter);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 09286f1..26fb1ee 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -318,6 +318,48 @@ struct auxtrace_record {
 	unsigned int alignment;
 };
 
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ *          to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ *                and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+	struct list_head	list;
+	bool			range;
+	bool			start;
+	const char		*action;
+	const char		*sym_from;
+	const char		*sym_to;
+	int			sym_from_idx;
+	int			sym_to_idx;
+	u64			addr;
+	u64			size;
+	const char		*filename;
+	char			*str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+	struct list_head	head;
+	int			cnt;
+};
+
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 /*
@@ -482,6 +524,12 @@ void perf_session__auxtrace_error_inc(struct perf_session *session,
 				      union perf_event *event);
 void events_stats__auxtrace_error_warn(const struct events_stats *stats);
 
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
 static inline int auxtrace__process_event(struct perf_session *session,
 					  union perf_event *event,
 					  struct perf_sample *sample,
@@ -640,6 +688,12 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
 {
 }
 
+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd);
-- 
cgit v0.10.2


From f9655200ecd2d6cc13900a727150177b94ca229e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:40 +0300
Subject: perf probe: Increase debug level of SDT debug messages

Two SDT debug messages can occur for every DSO which is too noisy.
Consequently, increase debug level of SDT messages.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5651f3c..e528c40 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -620,7 +620,7 @@ static int build_id_cache__add_sdt_cache(const char *sbuild_id,
 
 	ret = probe_cache__scan_sdt(cache, realname);
 	if (ret >= 0) {
-		pr_debug("Found %d SDTs in %s\n", ret, realname);
+		pr_debug4("Found %d SDTs in %s\n", ret, realname);
 		if (probe_cache__commit(cache) < 0)
 			ret = -1;
 	}
@@ -691,7 +691,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 
 	/* Update SDT cache : error is just warned */
 	if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
-		pr_debug("Failed to update/scan SDT cache for %s\n", realname);
+		pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:
 	if (!is_kallsyms)
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 6f931e4..436b647 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -699,7 +699,7 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
 	INIT_LIST_HEAD(&sdtlist);
 	ret = get_sdt_note_list(&sdtlist, pathname);
 	if (ret < 0) {
-		pr_debug("Failed to get sdt note: %d\n", ret);
+		pr_debug4("Failed to get sdt note: %d\n", ret);
 		return ret;
 	}
 	list_for_each_entry(note, &sdtlist, note_list) {
-- 
cgit v0.10.2


From 810c398bc09b2f2dfde52a7d2483a710612c5fb8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:41 +0300
Subject: perf intel-pt: Fix snapshot overlap detection decoder errors

Fix occasional decoder errors decoding trace data collected in snapshot
mode.

Snapshot mode can take successive snapshots of trace which might overlap.
The decoder checks whether there is an overlap but only looks at the
current and previous buffer. However buffers that do not contain
synchronization (i.e. PSB) packets cannot be decoded or used for overlap
checking. That means the decoder actually needs to check overlaps between
the current buffer and the previous buffer that contained usable data.
Make that change.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: stable@vger.kernel.org # v4.3+
Link: http://lkml.kernel.org/r/1474641528-18776-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b9cc353..b744ea8 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -241,7 +241,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	}
 
 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
+next:
 	buffer = auxtrace_buffer__next(queue, buffer);
 	if (!buffer) {
 		if (old_buffer)
@@ -264,9 +264,6 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
 		return -ENOMEM;
 
-	if (old_buffer)
-		auxtrace_buffer__drop_data(old_buffer);
-
 	if (buffer->use_data) {
 		b->len = buffer->use_size;
 		b->buf = buffer->use_data;
@@ -276,6 +273,16 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 	}
 	b->ref_timestamp = buffer->reference;
 
+	/*
+	 * If in snapshot mode and the buffer has no usable data, get next
+	 * buffer and again check overlap against old_buffer.
+	 */
+	if (ptq->pt->snapshot_mode && !b->len)
+		goto next;
+
+	if (old_buffer)
+		auxtrace_buffer__drop_data(old_buffer);
+
 	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
 						      !buffer->consecutive)) {
 		b->consecutive = false;
-- 
cgit v0.10.2


From fa8025c37454501a2df4a90ae84ff01f4aff8ba8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:42 +0300
Subject: perf intel-pt: Add support for recording the max non-turbo ratio

Previously the maximum non-turbo ratio was calculated from TSC assuming
a 100 MHz multiplier which is correct for current hardware supporting
Intel PT.  However more recent kernels also now export the value, so use
that in preference to the calculated value.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-11-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a2412e9..18b2151 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -302,6 +302,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	bool cap_user_time_zero = false, per_cpu_mmaps;
 	u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
 	u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+	unsigned long max_non_turbo_ratio;
 	int err;
 
 	if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
@@ -317,6 +318,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 
 	intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 
+	if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+				"%lu", &max_non_turbo_ratio) != 1)
+		max_non_turbo_ratio = 0;
+
 	if (!session->evlist->nr_mmaps)
 		return -EINVAL;
 
@@ -351,6 +356,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
 	auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
 	auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+	auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
 
 	return 0;
 }
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b744ea8..77fbf02 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2023,6 +2023,7 @@ static const char * const intel_pt_info_fmts[] = {
 	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
+	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
 };
 
 static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -2087,6 +2088,15 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 				    INTEL_PT_CYC_BIT);
 	}
 
+	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
+				(sizeof(u64) * INTEL_PT_MAX_NONTURBO_RATIO)) {
+		pt->max_non_turbo_ratio =
+			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_MAX_NONTURBO_RATIO,
+				    INTEL_PT_MAX_NONTURBO_RATIO);
+	}
+
 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
 	pt->have_tsc = intel_pt_have_tsc(pt);
 	pt->sampling_mode = false;
@@ -2156,7 +2166,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (pt->tc.time_mult) {
 		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
 
-		pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
+		if (!pt->max_non_turbo_ratio)
+			pt->max_non_turbo_ratio =
+					(tsc_freq + 50000000) / 100000000;
 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
 		intel_pt_log("Maximum non-turbo ratio %u\n",
 			     pt->max_non_turbo_ratio);
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index 0065949..8b83562 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -34,6 +34,7 @@ enum {
 	INTEL_PT_TSC_CTC_N,
 	INTEL_PT_TSC_CTC_D,
 	INTEL_PT_CYC_BIT,
+	INTEL_PT_MAX_NONTURBO_RATIO,
 	INTEL_PT_AUXTRACE_PRIV_MAX,
 };
 
-- 
cgit v0.10.2


From 4d34e10a9f9a38c611cac0deda8f91b064282747 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:43 +0300
Subject: perf intel-pt: Fix missing error codes processing auxtrace_info

Fix 2 places where the err variable was not being set.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-12-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 77fbf02..96519e8 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2138,11 +2138,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
 		if (!pt->switch_evsel) {
 			pr_err("%s: missing sched_switch event\n", __func__);
+			err = -EINVAL;
 			goto err_delete_thread;
 		}
 	} else if (pt->have_sched_switch == 2 &&
 		   !intel_pt_find_switch(session->evlist)) {
 		pr_err("%s: missing context_switch attribute flag\n", __func__);
+		err = -EINVAL;
 		goto err_delete_thread;
 	}
 
-- 
cgit v0.10.2


From 40b746a06332799786ba557fe84184428bef62fb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:44 +0300
Subject: perf intel-pt: Add a helper function for processing AUXTRACE_INFO

Add a helper function 'intel_pt_has()' to make it easier to determine
which members the AUXTRACE_INFO event contains.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 96519e8..f16b00f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2037,6 +2037,12 @@ static void intel_pt_print_info(u64 *arr, int start, int finish)
 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
 }
 
+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+	return auxtrace_info->header.size >=
+		sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
 int intel_pt_process_auxtrace_info(union perf_event *event,
 				   struct perf_session *session)
 {
@@ -2077,8 +2083,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
 			    INTEL_PT_PER_CPU_MMAPS);
 
-	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
-					(sizeof(u64) * INTEL_PT_CYC_BIT)) {
+	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
 		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
 		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
 		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
@@ -2088,8 +2093,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 				    INTEL_PT_CYC_BIT);
 	}
 
-	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
-				(sizeof(u64) * INTEL_PT_MAX_NONTURBO_RATIO)) {
+	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
 		pt->max_non_turbo_ratio =
 			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
 		intel_pt_print_info(&auxtrace_info->priv[0],
-- 
cgit v0.10.2


From c093f308cea3eb4be3ed6e7e9ad54bf67a26abe4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:45 +0300
Subject: perf intel-pt: Record address filter in AUXTRACE_INFO event

The address filter is needed to help decode the trace, so store it in
the AUXTRACE_INFO event.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-14-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 18b2151..90fa228 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -62,6 +62,7 @@ struct intel_pt_recording {
 	size_t				snapshot_ref_buf_size;
 	int				snapshot_ref_cnt;
 	struct intel_pt_snapshot_ref	*snapshot_refs;
+	size_t				priv_size;
 };
 
 static int intel_pt_parse_terms_with_default(struct list_head *formats,
@@ -273,11 +274,37 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
 	return attr;
 }
 
+static const char *intel_pt_find_filter(struct perf_evlist *evlist,
+					struct perf_pmu *intel_pt_pmu)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_pt_pmu->type)
+			return evsel->filter;
+	}
+
+	return NULL;
+}
+
+static size_t intel_pt_filter_bytes(const char *filter)
+{
+	size_t len = filter ? strlen(filter) : 0;
+
+	return len ? roundup(len + 1, 8) : 0;
+}
+
 static size_t
-intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
-			struct perf_evlist *evlist __maybe_unused)
+intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
 {
-	return INTEL_PT_AUXTRACE_PRIV_SIZE;
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
+
+	ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
+			 intel_pt_filter_bytes(filter);
+
+	return ptr->priv_size;
 }
 
 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
@@ -303,9 +330,12 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
 	u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
 	unsigned long max_non_turbo_ratio;
+	size_t filter_str_len;
+	const char *filter;
+	u64 *info;
 	int err;
 
-	if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
+	if (priv_size != ptr->priv_size)
 		return -EINVAL;
 
 	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
@@ -322,6 +352,9 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 				"%lu", &max_non_turbo_ratio) != 1)
 		max_non_turbo_ratio = 0;
 
+	filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
+	filter_str_len = filter ? strlen(filter) : 0;
+
 	if (!session->evlist->nr_mmaps)
 		return -EINVAL;
 
@@ -357,6 +390,16 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
 	auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
 	auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
+	auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+
+	if (filter_str_len) {
+		size_t len = intel_pt_filter_bytes(filter);
+
+		strncpy((char *)info, filter, len);
+		info += len >> 3;
+	}
 
 	return 0;
 }
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index 8b83562..e13b14e 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -35,11 +35,10 @@ enum {
 	INTEL_PT_TSC_CTC_D,
 	INTEL_PT_CYC_BIT,
 	INTEL_PT_MAX_NONTURBO_RATIO,
+	INTEL_PT_FILTER_STR_LEN,
 	INTEL_PT_AUXTRACE_PRIV_MAX,
 };
 
-#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
-
 struct auxtrace_record;
 struct perf_tool;
 union perf_event;
-- 
cgit v0.10.2


From 2b9e32c47fd3edb0373067de7a151775b0e005c2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:46 +0300
Subject: perf intel-pt: Read address filter from AUXTRACE_INFO event

Read the address filter from the AUXTRACE_INFO event in preparation for
using it to assist in decoding.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index f16b00f..c9fec19 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -103,6 +103,8 @@ struct intel_pt {
 	unsigned max_non_turbo_ratio;
 
 	unsigned long num_events;
+
+	char *filter;
 };
 
 enum switch_state {
@@ -1774,6 +1776,7 @@ static void intel_pt_free(struct perf_session *session)
 	intel_pt_free_events(session);
 	session->auxtrace = NULL;
 	thread__put(pt->unknown_thread);
+	zfree(&pt->filter);
 	free(pt);
 }
 
@@ -2024,6 +2027,7 @@ static const char * const intel_pt_info_fmts[] = {
 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
 	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
+	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
 };
 
 static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -2037,6 +2041,14 @@ static void intel_pt_print_info(u64 *arr, int start, int finish)
 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
 }
 
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
+}
+
 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
 {
 	return auxtrace_info->header.size >=
@@ -2049,6 +2061,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
 	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
 	struct intel_pt *pt;
+	void *info_end;
+	u64 *info;
 	int err;
 
 	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
@@ -2101,6 +2115,42 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 				    INTEL_PT_MAX_NONTURBO_RATIO);
 	}
 
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+	info_end = (void *)info + auxtrace_info->header.size;
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+		size_t len;
+
+		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_FILTER_STR_LEN,
+				    INTEL_PT_FILTER_STR_LEN);
+		if (len) {
+			const char *filter = (const char *)info;
+
+			len = roundup(len + 1, 8);
+			info += len >> 3;
+			if ((void *)info > info_end) {
+				pr_err("%s: bad filter string length\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			pt->filter = memdup(filter, len);
+			if (!pt->filter) {
+				err = -ENOMEM;
+				goto err_free_queues;
+			}
+			if (session->header.needs_swap)
+				mem_bswap_64(pt->filter, len);
+			if (pt->filter[len - 1]) {
+				pr_err("%s: filter string not null terminated\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+		}
+		intel_pt_print_info_str("Filter string", pt->filter);
+	}
+
 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
 	pt->have_tsc = intel_pt_have_tsc(pt);
 	pt->sampling_mode = false;
@@ -2218,6 +2268,7 @@ err_free_queues:
 	auxtrace_queues__free(&pt->queues);
 	session->auxtrace = NULL;
 err_free:
+	zfree(&pt->filter);
 	free(pt);
 	return err;
 }
-- 
cgit v0.10.2


From 9f1d122b528ef3ffcef1bdcf6a3dddf9450a864e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:47 +0300
Subject: perf intel-pt: Enable decoder to handle TIP.PGD with missing IP

When address filters are used, the decoder must detect the end of a
filter region (or a branch into a tracestop region) by matching Packet
Generation Disabled (TIP.PGD) packets against the object code using the
IP given in the packet. However, due to errata SKL014 "Intel PT TIP.PGD
May Not Have Target IP Payload", that IP may not be present.

Enable the decoder to handle that by adding a new callback function
'pgd_ip()' which indicates whether the IP is not traced, in which case
that is the point where the trace was disabled.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-16-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8ff6c6a..7591a0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -80,6 +80,7 @@ struct intel_pt_decoder {
 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
 			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	struct intel_pt_state state;
 	const unsigned char *buf;
@@ -186,6 +187,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 
 	decoder->get_trace          = params->get_trace;
 	decoder->walk_insn          = params->walk_insn;
+	decoder->pgd_ip             = params->pgd_ip;
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
 
@@ -1008,6 +1010,19 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 	int err;
 
 	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+	if (err == INTEL_PT_RETURN &&
+	    decoder->pgd_ip &&
+	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+	    (decoder->state.type & INTEL_PT_BRANCH) &&
+	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+		/* Unconditional branch leaving filter region */
+		decoder->no_progress = 0;
+		decoder->pge = false;
+		decoder->continuous_period = false;
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->state.to_ip = 0;
+		return 0;
+	}
 	if (err == INTEL_PT_RETURN)
 		return 0;
 	if (err)
@@ -1036,6 +1051,21 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 	}
 
 	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+				 intel_pt_insn.rel;
+
+		if (decoder->pgd_ip &&
+		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+		    decoder->pgd_ip(to_ip, decoder->data)) {
+			/* Conditional branch leaving filter region */
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->ip = to_ip;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+		}
 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
 				decoder->ip);
 		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fe..8939998 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -83,6 +83,7 @@ struct intel_pt_params {
 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
 			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	bool return_compression;
 	uint64_t period;
-- 
cgit v0.10.2


From 2acee108f58045d07475516852d4282ab73904dc Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 23 Sep 2016 17:38:48 +0300
Subject: perf intel-pt: Fix decoding when there are address filters

Due to errata SKL014 "Intel PT TIP.PGD May Not Have Target IP Payload",
the Intel PT decoder needs to match address filters against TIP.PGD
packets.  Parse the address filters and implement the decoder's
'pgd_ip()' callback to match the IP against the filter regions.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: http://lkml.kernel.org/r/1474641528-18776-17-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index c9fec19..dc041d4 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -105,6 +105,7 @@ struct intel_pt {
 	unsigned long num_events;
 
 	char *filter;
+	struct addr_filters filts;
 };
 
 enum switch_state {
@@ -550,6 +551,76 @@ out_no_cache:
 	return 0;
 }
 
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+				  uint64_t offset, const char *filename)
+{
+	struct addr_filter *filt;
+	bool have_filter   = false;
+	bool hit_tracestop = false;
+	bool hit_filter    = false;
+
+	list_for_each_entry(filt, &pt->filts.head, list) {
+		if (filt->start)
+			have_filter = true;
+
+		if ((filename && !filt->filename) ||
+		    (!filename && filt->filename) ||
+		    (filename && strcmp(filename, filt->filename)))
+			continue;
+
+		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+			continue;
+
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+			     ip, offset, filename ? filename : "[kernel]",
+			     filt->start ? "filter" : "stop",
+			     filt->addr, filt->size);
+
+		if (filt->start)
+			hit_filter = true;
+		else
+			hit_tracestop = true;
+	}
+
+	if (!hit_tracestop && !hit_filter)
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+			     ip, offset, filename ? filename : "[kernel]");
+
+	return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct thread *thread;
+	struct addr_location al;
+	u8 cpumode;
+	u64 offset;
+
+	if (ip >= ptq->pt->kernel_start)
+		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+	cpumode = PERF_RECORD_MISC_USER;
+
+	thread = ptq->thread;
+	if (!thread)
+		return -EINVAL;
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+	if (!al.map || !al.map->dso)
+		return -EINVAL;
+
+	offset = al.map->map_ip(al.map, ip);
+
+	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+				     al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
 static bool intel_pt_get_config(struct intel_pt *pt,
 				struct perf_event_attr *attr, u64 *config)
 {
@@ -726,6 +797,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
 
+	if (pt->filts.cnt > 0)
+		params.pgd_ip = intel_pt_pgd_ip;
+
 	if (pt->synth_opts.instructions) {
 		if (pt->synth_opts.period) {
 			switch (pt->synth_opts.period_type) {
@@ -1776,6 +1850,7 @@ static void intel_pt_free(struct perf_session *session)
 	intel_pt_free_events(session);
 	session->auxtrace = NULL;
 	thread__put(pt->unknown_thread);
+	addr_filters__exit(&pt->filts);
 	zfree(&pt->filter);
 	free(pt);
 }
@@ -2073,6 +2148,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (!pt)
 		return -ENOMEM;
 
+	addr_filters__init(&pt->filts);
+
 	perf_config(intel_pt_perf_config, pt);
 
 	err = auxtrace_queues__init(&pt->queues);
@@ -2147,6 +2224,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 				err = -EINVAL;
 				goto err_free_queues;
 			}
+			err = addr_filters__parse_bare_filter(&pt->filts,
+							      filter);
+			if (err)
+				goto err_free_queues;
 		}
 		intel_pt_print_info_str("Filter string", pt->filter);
 	}
@@ -2268,6 +2349,7 @@ err_free_queues:
 	auxtrace_queues__free(&pt->queues);
 	session->auxtrace = NULL;
 err_free:
+	addr_filters__exit(&pt->filts);
 	zfree(&pt->filter);
 	free(pt);
 	return err;
-- 
cgit v0.10.2


From f8da4b5155ed9a639ee4250746b5f7ffa6302bf6 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 24 Sep 2016 00:34:57 +0900
Subject: perf probe: Ignore the error of finding inline instance

Ignore the error when the perf probe failed to find inline function
instances. This can happen when we search a method in C++ debuginfo.  If
there is completely no instance in target, perf probe can return an
error.

E.g. without this fix:
  ----
  $ perf probe -x /usr/lib64/libstdc++.so.6 -vD showmanyc
  probe-definition(0): showmanyc
  symbol:showmanyc file:(null) line:0 offset:0 return:0 lazy:(null)
  0 arguments
  symbol:catch file:(null) line:0 offset:0 return:0 lazy:(null)
  symbol:throw file:(null) line:0 offset:0 return:0 lazy:(null)
  symbol:rethrow file:(null) line:0 offset:0 return:0 lazy:(null)
  Open Debuginfo file: /usr/lib/debug/usr/lib64/libstdc++.so.6.0.22.debug
  Try to find probe point from debuginfo.
  Matched function: showmanyc
  An error occurred in debuginfo analysis (-2).
  Trying to use symbols.
  Failed to find symbol showmanyc in /usr/lib64/libstdc++.so.6.0.22
    Error: Failed to add events. Reason: No such file or directory (Code: -2)
  ----

This is because one of showmanyc is defined as inline but no instance
found. With this fix, it is succeeded to show as below.
  ----
  $ perf probe -x /usr/lib64/libstdc++.so.6 -D showmanyc
  p:probe_libstdc++/showmanyc /usr/lib64/libstdc++.so.6.0.22:0xb0e50
  p:probe_libstdc++/showmanyc_1 /usr/lib64/libstdc++.so.6.0.22:0xc7c40
  p:probe_libstdc++/showmanyc_2 /usr/lib64/libstdc++.so.6.0.22:0xecfa0
  p:probe_libstdc++/showmanyc_3 /usr/lib64/libstdc++.so.6.0.22:0x115fc0
  p:probe_libstdc++/showmanyc_4 /usr/lib64/libstdc++.so.6.0.22:0x121a90
  ----

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/147464489775.29804.3190419491209875936.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 8daca4f..5fe8325 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -988,7 +988,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
 		return DWARF_CB_OK;
 
-	pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
+	pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+		 (unsigned long)dwarf_dieoffset(sp_die));
 	pf->fname = dwarf_decl_file(sp_die);
 	if (pp->line) { /* Function relative line */
 		dwarf_decl_line(sp_die, &pf->lno);
@@ -1011,7 +1012,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 		param->retval = die_walk_instances(sp_die,
 					probe_point_inline_cb, (void *)pf);
 		/* This could be a non-existed inline definition */
-		if (param->retval == -ENOENT && strisglob(pp->function))
+		if (param->retval == -ENOENT)
 			param->retval = 0;
 	}
 
-- 
cgit v0.10.2


From 0ad45b33c58dca60dec7e1fb44766753bc4a7a38 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 24 Sep 2016 00:35:07 +0900
Subject: perf probe: Skip if the function address is 0

Skip probes if the entry address of the target function is 0.  This can
happen when we're handling C++ debuginfo files.

E.g. without this fix, below case still fail.
  ----
  $ ./perf probe -x /usr/lib64/libstdc++.so.6 -vD is_open
  probe-definition(0): is_open
  symbol:is_open file:(null) line:0 offset:0 return:0 lazy:(null)
  0 arguments
  symbol:catch file:(null) line:0 offset:0 return:0 lazy:(null)
  symbol:throw file:(null) line:0 offset:0 return:0 lazy:(null)
  symbol:rethrow file:(null) line:0 offset:0 return:0 lazy:(null)
  Open Debuginfo file: /usr/lib/debug/usr/lib64/libstdc++.so.6.0.22.debug
  Try to find probe point from debuginfo.
  Matched function: is_open [295df]
  found inline addr: 0x8ca80
  Probe point found: is_open+0
  found inline addr: 0x8ca70
  Probe point found: is_open+0
  found inline addr: 0x8ca60
  Probe point found: is_open+0
  Matched function: is_open [6527f]
  Matched function: is_open [9fe8a]
  Probe point found: is_open+0
  Matched function: is_open [19710b]
  found inline addr: 0xecca9
  Probe point found: stdio_filebuf+57
  found inline addr: 0x0
  Probe point found: swap+0
  Matched function: is_open [19fc9d]
  Probe point found: is_open+0
  Found 7 probe_trace_events.
  p:probe_libstdc++/is_open /usr/lib64/libstdc++.so.6.0.22:0x8ca80
  p:probe_libstdc++/is_open_1 /usr/lib64/libstdc++.so.6.0.22:0x8ca70
  p:probe_libstdc++/is_open_2 /usr/lib64/libstdc++.so.6.0.22:0x8ca60
  p:probe_libstdc++/is_open_3 /usr/lib64/libstdc++.so.6.0.22:0xb0ad0
  p:probe_libstdc++/is_open_4 /usr/lib64/libstdc++.so.6.0.22:0xecca9
  Failed to synthesize probe trace event.
    Error: Failed to add events. Reason: Invalid argument (Code: -22)
  ----
This is because some instances have entry_pc == 0 (see 19710b and
19fc9d). With this fix, those are skipped.

  ----
  $ ./perf probe -x /usr/lib64/libstdc++.so.6 -D is_open
  p:probe_libstdc++/is_open /usr/lib64/libstdc++.so.6.0.22:0x8ca80
  p:probe_libstdc++/is_open_1 /usr/lib64/libstdc++.so.6.0.22:0x8ca70
  p:probe_libstdc++/is_open_2 /usr/lib64/libstdc++.so.6.0.22:0x8ca60
  p:probe_libstdc++/is_open_3 /usr/lib64/libstdc++.so.6.0.22:0xb0ad0
  p:probe_libstdc++/is_open_4 /usr/lib64/libstdc++.so.6.0.22:0xecca9
  ----

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/147464490707.29804.14277897643725143867.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 5fe8325..df4debe 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -955,6 +955,11 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
 				   dwarf_diename(in_die));
 			return -ENOENT;
 		}
+		if (addr == 0) {
+			pr_debug("%s has no valid entry address. skipped.\n",
+				 dwarf_diename(in_die));
+			return -ENOENT;
+		}
 		pf->addr = addr;
 		pf->addr += pp->offset;
 		pr_debug("found inline addr: 0x%jx\n",
@@ -998,8 +1003,13 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 	} else if (die_is_func_instance(sp_die)) {
 		/* Instances always have the entry address */
 		dwarf_entrypc(sp_die, &pf->addr);
+		/* But in some case the entry address is 0 */
+		if (pf->addr == 0) {
+			pr_debug("%s has no entry PC. Skipped\n",
+				 dwarf_diename(sp_die));
+			param->retval = 0;
 		/* Real function */
-		if (pp->lazy_line)
+		} else if (pp->lazy_line)
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
 			skip_prologue(sp_die, pf);
-- 
cgit v0.10.2


From 35726d3a4ca9ce10811032baf6fa0b3529aac087 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 24 Sep 2016 00:35:16 +0900
Subject: perf probe: Fix to cut off incompatible chars from group name

Cut off the characters which can not use for group name of uprobes
when making it based on executable filename.

For example, if the exec name is libstdc++.so, without this fix
perf probe generates "probe_libstdc++" as the group name, but
it is failed to set because '+' can not be used for group name.

With this fix perf accepts only alphabet, number or '_' for group
name, thus perf generates "probe_libstdc" as the group name.

E.g. with this fix, you can see the event name has no "+".
  ----
  $ ./perf probe -x /usr/lib64/libstdc++.so.6 -D is_open
  p:probe_libstdc/is_open /usr/lib64/libstdc++.so.6.0.22:0x8ca80
  p:probe_libstdc/is_open_1 /usr/lib64/libstdc++.so.6.0.22:0x8ca70
  p:probe_libstdc/is_open_2 /usr/lib64/libstdc++.so.6.0.22:0x8ca60
  p:probe_libstdc/is_open_3 /usr/lib64/libstdc++.so.6.0.22:0xb0ad0
  p:probe_libstdc/is_open_4 /usr/lib64/libstdc++.so.6.0.22:0xecca9
  ----

Committer note:

Before this fix:

  # perf probe -x /usr/lib64/libstdc++.so.6 is_open
  Failed to write event: Invalid argument
    Error: Failed to add events.
  #

After the fix:

  # perf probe -x /usr/lib64/libstdc++.so.6 is_open
  Added new events:
    probe_libstdc:is_open (on is_open in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_1 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_2 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_3 (on is_open in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_4 (on is_open in /usr/lib64/libstdc++.so.6.0.22)

  You can now use it in all perf tools, such as:

	  perf record -e probe_libstdc:is_open_4 -aR sleep 1

  # perf probe -l probe_libstdc:*
    probe_libstdc:is_open (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_1 (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_2 (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_3 (on is_open@src/c++98/basic_file.cc in /usr/lib64/libstdc++.so.6.0.22)
    probe_libstdc:is_open_4 (on stdio_filebuf:5@include/ext/stdio_filebuf.h in /usr/lib64/libstdc++.so.6.0.22)
  #

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/147464491667.29804.9553638175441827970.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index bc60ce4..fcfbef0 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -213,9 +213,13 @@ static int convert_exec_to_group(const char *exec, char **result)
 		goto out;
 	}
 
-	ptr2 = strpbrk(ptr1, "-._");
-	if (ptr2)
-		*ptr2 = '\0';
+	for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+		if (!isalnum(*ptr2) && *ptr2 != '_') {
+			*ptr2 = '\0';
+			break;
+		}
+	}
+
 	ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
 	if (ret < 0)
 		goto out;
-- 
cgit v0.10.2


From d5a00296a63fdd049273f86d0a0cdef6b230f8e6 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 24 Sep 2016 00:35:31 +0900
Subject: perf probe: Match linkage name with mangled name

Match linkage name with mangled name if exists. The linkage_name is used
for storing mangled name of the object.

Thus, this allows 'perf probe' to find appropriate probe point from
mangled symbol as below.

E.g. without this fix:
  ----
  $ perf probe -x /usr/lib64/libstdc++.so.6 \
    -D _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
  Probe point '_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv'
  not found.
    Error: Failed to add events.
  ----

With this fix, perf probe can find the correct one.
  ----
  $ perf probe -x /usr/lib64/libstdc++.so.6 \
    -D _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
  p:probe_libstdc/_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
  /usr/lib64/libstdc++.so.6.0.22:0x8ca60
  ----

Committer notes:

After the fix, setting it for real (no -D/--definition, that amounts to
a --dry-run):

  # perf probe -x /usr/lib64/libstdc++.so.6 _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv
  Added new event:
    probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv (on _ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv in /usr/lib64/libstdc++.so.6.0.22)

  You can now use it in all perf tools, such as:

	perf record -e probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv -aR sleep 1

  # perf probe -l probe_libstdc:*
    probe_libstdc:_ZNKSt15basic_fstreamXXIwSt11char_traitsIwEE7is_openEv (on is_open@libstdc++-v3/include/fstream in /usr/lib64/libstdc++.so.6.0.22)
  #

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/147464493162.29804.16715053505069382443.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index faec899..41e068e 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -130,6 +130,22 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
 }
 
 /**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/**
  * die_compare_name - Compare diename and tname
  * @dw_die: a DIE
  * @tname: a string of target name
@@ -145,18 +161,26 @@ bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
 }
 
 /**
- * die_match_name - Match diename and glob
+ * die_match_name - Match diename/linkage name and glob
  * @dw_die: a DIE
  * @glob: a string of target glob pattern
  *
  * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
  */
 bool die_match_name(Dwarf_Die *dw_die, const char *glob)
 {
 	const char *name;
 
 	name = dwarf_diename(dw_die);
-	return name ? strglobmatch(name, glob) : false;
+	if (name && strglobmatch(name, glob))
+		return true;
+	/* fall back to check linkage name */
+	name = die_get_linkage_name(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+
+	return false;
 }
 
 /**
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 8b6d2f8..8ac53bf 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@ int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
 int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
 			 int (*callback)(Dwarf_Die *, void *), void *data);
 
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
 /* Ensure that this DIE is a subprogram and definition (not declaration) */
 bool die_is_func_def(Dwarf_Die *dw_die);
 
-- 
cgit v0.10.2


From d18019a53a07e009899ff6b8dc5ec30f249360d9 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Mon, 19 Sep 2016 02:38:20 -0400
Subject: perf tests: Add dwarf unwind test for powerpc

The user stack dump feature was recently added for powerpc. But there
was no test case available to test it.

This test works same as on other architectures by preparing a stack
frame on the perf test thread and comparing each frame by unwinding it.

  $ ./perf test 50
    50: Test dwarf unwind    : Ok

User stack dump for powerpc: https://lkml.org/lkml/2016/4/28/482

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1474267100-31079-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index 54afe4a..db52fa2 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1 +1,2 @@
 libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 0000000..84d8ded
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,13 @@
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 75de0e9..c12f4e8 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -5,6 +5,8 @@
 #include <linux/types.h>
 #include <asm/perf_regs.h>
 
+void perf_regs_load(u64 *regs);
+
 #define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
 #define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
 #ifdef __powerpc64__
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 0000000..d827ef3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 0000000..e24f462
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,15 @@
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "Test dwarf unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 0000000..0bac313
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+			 struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
new file mode 100644
index 0000000..d76c9a3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,94 @@
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0	 0
+#define R1	 1 * 8
+#define R2	 2 * 8
+#define R3	 3 * 8
+#define R4	 4 * 8
+#define R5	 5 * 8
+#define R6	 6 * 8
+#define R7	 7 * 8
+#define R8	 8 * 8
+#define R9	 9 * 8
+#define R10	10 * 8
+#define R11	11 * 8
+#define R12	12 * 8
+#define R13	13 * 8
+#define R14	14 * 8
+#define R15	15 * 8
+#define R16	16 * 8
+#define R17	17 * 8
+#define R18	18 * 8
+#define R19	19 * 8
+#define R20	20 * 8
+#define R21	21 * 8
+#define R22	22 * 8
+#define R23	23 * 8
+#define R24	24 * 8
+#define R25	25 * 8
+#define R26	26 * 8
+#define R27	27 * 8
+#define R28	28 * 8
+#define R29	29 * 8
+#define R30	30 * 8
+#define R31	31 * 8
+#define NIP	32 * 8
+#define CTR	35 * 8
+#define LINK	36 * 8
+#define XER	37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+	std 0, R0(3)
+	std 1, R1(3)
+	std 2, R2(3)
+	std 3, R3(3)
+	std 4, R4(3)
+	std 5, R5(3)
+	std 6, R6(3)
+	std 7, R7(3)
+	std 8, R8(3)
+	std 9, R9(3)
+	std 10, R10(3)
+	std 11, R11(3)
+	std 12, R12(3)
+	std 13, R13(3)
+	std 14, R14(3)
+	std 15, R15(3)
+	std 16, R16(3)
+	std 17, R17(3)
+	std 18, R18(3)
+	std 19, R19(3)
+	std 20, R20(3)
+	std 21, R21(3)
+	std 22, R22(3)
+	std 23, R23(3)
+	std 24, R24(3)
+	std 25, R25(3)
+	std 26, R26(3)
+	std 27, R27(3)
+	std 28, R28(3)
+	std 29, R29(3)
+	std 30, R30(3)
+	std 31, R31(3)
+
+	/* store NIP */
+	mflr 4
+	std 4, NIP(3)
+
+	/* Store LR */
+	std 4, LINK(3)
+
+	/* Store XER */
+	mfxer 4
+	std 4, XER(3)
+
+	/* Store CTR */
+	mfctr 4
+	std 4, CTR(3)
+
+	/* Restore original value of r4 */
+	ld 4, R4(3)
+
+	blr
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index dc51bc5..8a4ce49 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -71,7 +71,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 8f6eb85..1046491 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -11,7 +11,7 @@
 #include "thread.h"
 #include "callchain.h"
 
-#if defined (__x86_64__) || defined (__i386__)
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
 #include "arch-tests.h"
 #endif
 
-- 
cgit v0.10.2