Rewind v3.13-rc3+ (78fd82238d0e5716) to v3.12

author: Scott Wood <scottwood@freescale.com> 2014-04-07 23:49:35 (GMT)
committer: Scott Wood <scottwood@freescale.com> 2014-04-07 23:49:35 (GMT)
commit: 62b8c978ee6b8d135d9e7953221de58000dba986 (patch)
tree: 683b04b2e627f6710c22c151b23c8cc9a165315e /kernel/events/core.c
parent: 78fd82238d0e5716578c326404184a27ba67fd6e (diff)
download: linux-fsl-qoriq-62b8c978ee6b8d135d9e7953221de58000dba986.tar.xz
1 files changed, 75 insertions, 105 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72348dc..953c143 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly	= DEFAULT_MAX_SAMPLE_RATE;
 static int max_samples_per_tick __read_mostly	= DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
 static int perf_sample_period_ns __read_mostly	= DEFAULT_SAMPLE_PERIOD_NS;
 
-static int perf_sample_allowed_ns __read_mostly =
-	DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
+static atomic_t perf_sample_allowed_ns __read_mostly =
+	ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
 
 void update_perf_cpu_limits(void)
 {
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void)
 
 	tmp *= sysctl_perf_cpu_time_max_percent;
 	do_div(tmp, 100);
-	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
+	atomic_set(&perf_sample_allowed_ns, tmp);
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	int ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
@@ -228,15 +228,14 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
  * we detect that events are taking too long.
  */
 #define NR_ACCUMULATED_SAMPLES 128
-static DEFINE_PER_CPU(u64, running_sample_length);
+DEFINE_PER_CPU(u64, running_sample_length);
 
 void perf_sample_event_took(u64 sample_len_ns)
 {
 	u64 avg_local_sample_len;
 	u64 local_samples_len;
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
 
-	if (allowed_ns == 0)
+	if (atomic_read(&perf_sample_allowed_ns) == 0)
 		return;
 
 	/* decay the counter by 1 average sample */
@@ -252,7 +251,7 @@ void perf_sample_event_took(u64 sample_len_ns)
 	 */
 	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
 
-	if (avg_local_sample_len <= allowed_ns)
+	if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
 		return;
 
 	if (max_samples_per_tick <= 1)
@@ -263,9 +262,10 @@ void perf_sample_event_took(u64 sample_len_ns)
 	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 
 	printk_ratelimited(KERN_WARNING
-			"perf samples too long (%lld > %lld), lowering "
+			"perf samples too long (%lld > %d), lowering "
 			"kernel.perf_event_max_sample_rate to %d\n",
-			avg_local_sample_len, allowed_ns,
+			avg_local_sample_len,
+			atomic_read(&perf_sample_allowed_ns),
 			sysctl_perf_event_sample_rate);
 
 	update_perf_cpu_limits();
@@ -899,7 +899,6 @@ static void unclone_ctx(struct perf_event_context *ctx)
 		put_ctx(ctx->parent_ctx);
 		ctx->parent_ctx = NULL;
 	}
-	ctx->generation++;
 }
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1137,8 +1136,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
-
-	ctx->generation++;
 }
 
 /*
@@ -1204,9 +1201,6 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		size += sizeof(data->data_src.val);
 
-	if (sample_type & PERF_SAMPLE_TRANSACTION)
-		size += sizeof(data->txn);
-
 	event->header_size = size;
 }
 
@@ -1316,8 +1310,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	 */
 	if (event->state > PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_OFF;
-
-	ctx->generation++;
 }
 
 static void perf_group_detach(struct perf_event *event)
@@ -2154,38 +2146,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 }
 
 /*
- * Test whether two contexts are equivalent, i.e. whether they have both been
- * cloned from the same version of the same context.
- *
- * Equivalence is measured using a generation number in the context that is
- * incremented on each modification to it; see unclone_ctx(), list_add_event()
- * and list_del_event().
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
  */
 static int context_equiv(struct perf_event_context *ctx1,
 			 struct perf_event_context *ctx2)
 {
-	/* Pinning disables the swap optimization */
-	if (ctx1->pin_count || ctx2->pin_count)
-		return 0;
-
-	/* If ctx1 is the parent of ctx2 */
-	if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
-		return 1;
-
-	/* If ctx2 is the parent of ctx1 */
-	if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
-		return 1;
-
-	/*
-	 * If ctx1 and ctx2 have the same parent; we flatten the parent
-	 * hierarchy, see perf_event_init_context().
-	 */
-	if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
-			ctx1->parent_gen == ctx2->parent_gen)
-		return 1;
-
-	/* Unmatched */
-	return 0;
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
 static void __perf_event_sync_stat(struct perf_event *event,
@@ -2234,6 +2210,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
 	perf_event_update_userpage(next_event);
 }
 
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
 static void perf_event_sync_stat(struct perf_event_context *ctx,
 				   struct perf_event_context *next_ctx)
 {
@@ -2265,7 +2244,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 {
 	struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
 	struct perf_event_context *next_ctx;
-	struct perf_event_context *parent, *next_parent;
+	struct perf_event_context *parent;
 	struct perf_cpu_context *cpuctx;
 	int do_switch = 1;
 
@@ -2277,18 +2256,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		return;
 
 	rcu_read_lock();
-	next_ctx = next->perf_event_ctxp[ctxn];
-	if (!next_ctx)
-		goto unlock;
-
 	parent = rcu_dereference(ctx->parent_ctx);
-	next_parent = rcu_dereference(next_ctx->parent_ctx);
-
-	/* If neither context have a parent context; they cannot be clones. */
-	if (!parent && !next_parent)
-		goto unlock;
-
-	if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
+	next_ctx = next->perf_event_ctxp[ctxn];
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
 		/*
 		 * Looks like the two contexts are clones, so we might be
 		 * able to optimize the context switch.  We lock both
@@ -2316,7 +2287,6 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		raw_spin_unlock(&next_ctx->lock);
 		raw_spin_unlock(&ctx->lock);
 	}
-unlock:
 	rcu_read_unlock();
 
 	if (do_switch) {
@@ -4602,9 +4572,6 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		perf_output_put(handle, data->data_src.val);
 
-	if (sample_type & PERF_SAMPLE_TRANSACTION)
-		perf_output_put(handle, data->txn);
-
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -5133,26 +5100,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
-	char *name;
+	const char *name;
+
+	memset(tmp, 0, sizeof(tmp));
 
 	if (file) {
 		struct inode *inode;
 		dev_t dev;
-
-		buf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (!buf) {
-			name = "//enomem";
-			goto cpy_name;
-		}
 		/*
-		 * d_path() works from the end of the rb backwards, so we
+		 * d_path works from the end of the rb backwards, so we
 		 * need to add enough zero bytes after the string to handle
 		 * the 64bit alignment we do later.
 		 */
-		name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64));
+		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
 		if (IS_ERR(name)) {
-			name = "//toolong";
-			goto cpy_name;
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
 		}
 		inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
@@ -5160,39 +5128,34 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		gen = inode->i_generation;
 		maj = MAJOR(dev);
 		min = MINOR(dev);
-		goto got_name;
+
 	} else {
-		name = (char *)arch_vma_name(vma);
-		if (name)
-			goto cpy_name;
+		if (arch_vma_name(mmap_event->vma)) {
+			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+				       sizeof(tmp) - 1);
+			tmp[sizeof(tmp) - 1] = '\0';
+			goto got_name;
+		}
 
-		if (vma->vm_start <= vma->vm_mm->start_brk &&
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_brk &&
 				vma->vm_end >= vma->vm_mm->brk) {
-			name = "[heap]";
-			goto cpy_name;
-		}
-		if (vma->vm_start <= vma->vm_mm->start_stack &&
+			name = strncpy(tmp, "[heap]", sizeof(tmp));
+			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_stack &&
 				vma->vm_end >= vma->vm_mm->start_stack) {
-			name = "[stack]";
-			goto cpy_name;
+			name = strncpy(tmp, "[stack]", sizeof(tmp));
+			goto got_name;
 		}
 
-		name = "//anon";
-		goto cpy_name;
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
 	}
 
-cpy_name:
-	strlcpy(tmp, name, sizeof(tmp));
-	name = tmp;
 got_name:
-	/*
-	 * Since our buffer works in 8 byte units we need to align our string
-	 * size to a multiple of 8. However, we must guarantee the tail end is
-	 * zero'd out to avoid leaking random bits to userspace.
-	 */
-	size = strlen(name)+1;
-	while (!IS_ALIGNED(size, sizeof(u64)))
-		name[size++] = '\0';
+	size = ALIGN(strlen(name)+1, sizeof(u64));
 
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
@@ -5680,6 +5643,11 @@ static void swevent_hlist_put(struct perf_event *event)
 {
 	int cpu;
 
+	if (event->cpu != -1) {
+		swevent_hlist_put_cpu(event, event->cpu);
+		return;
+	}
+
 	for_each_possible_cpu(cpu)
 		swevent_hlist_put_cpu(event, cpu);
 }
@@ -5713,6 +5681,9 @@ static int swevent_hlist_get(struct perf_event *event)
 	int err;
 	int cpu, failed_cpu;
 
+	if (event->cpu != -1)
+		return swevent_hlist_get_cpu(event, event->cpu);
+
 	get_online_cpus();
 	for_each_possible_cpu(cpu) {
 		err = swevent_hlist_get_cpu(event, cpu);
@@ -6321,7 +6292,6 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
 
 	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
 }
-static DEVICE_ATTR_RO(type);
 
 static ssize_t
 perf_event_mux_interval_ms_show(struct device *dev,
@@ -6366,19 +6336,17 @@ perf_event_mux_interval_ms_store(struct device *dev,
 
 	return count;
 }
-static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
 
-static struct attribute *pmu_dev_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_perf_event_mux_interval_ms.attr,
-	NULL,
+static struct device_attribute pmu_dev_attrs[] = {
+	__ATTR_RO(type),
+	__ATTR_RW(perf_event_mux_interval_ms),
+	__ATTR_NULL,
 };
-ATTRIBUTE_GROUPS(pmu_dev);
 
 static int pmu_bus_running;
 static struct bus_type pmu_bus = {
 	.name		= "event_source",
-	.dev_groups	= pmu_dev_groups,
+	.dev_attrs	= pmu_dev_attrs,
 };
 
 static void pmu_dev_release(struct device *dev)
@@ -7158,6 +7126,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	perf_install_in_context(ctx, event, event->cpu);
+	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
@@ -7240,6 +7209,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
author	Scott Wood <scottwood@freescale.com>	2014-04-07 23:49:35 (GMT)
committer	Scott Wood <scottwood@freescale.com>	2014-04-07 23:49:35 (GMT)
commit	62b8c978ee6b8d135d9e7953221de58000dba986 (patch)
tree	683b04b2e627f6710c22c151b23c8cc9a165315e /kernel/events/core.c
parent	78fd82238d0e5716578c326404184a27ba67fd6e (diff)
download	linux-fsl-qoriq-62b8c978ee6b8d135d9e7953221de58000dba986.tar.xz