Merge branch 'merge' into sdk-v1.6.x

This reverts v3.13-rc3+ (78fd82238d0e5716) to v3.12, except for commits which I noticed which appear relevant to the SDK. Signed-off-by: Scott Wood <scottwood@freescale.com> Conflicts: arch/powerpc/include/asm/kvm_host.h arch/powerpc/kvm/book3s_hv_rmhandlers.S arch/powerpc/kvm/book3s_interrupts.S arch/powerpc/kvm/e500.c arch/powerpc/kvm/e500mc.c arch/powerpc/sysdev/fsl_soc.h drivers/Kconfig drivers/cpufreq/ppc-corenet-cpufreq.c drivers/dma/fsldma.c drivers/dma/s3c24xx-dma.c drivers/misc/Makefile drivers/mmc/host/sdhci-of-esdhc.c drivers/mtd/devices/m25p80.c drivers/net/ethernet/freescale/gianfar.h drivers/platform/Kconfig drivers/platform/Makefile drivers/spi/spi-fsl-espi.c include/crypto/algapi.h include/linux/netdev_features.h include/linux/skbuff.h include/net/ip.h net/core/ethtool.c
author: Scott Wood <scottwood@freescale.com> 2014-04-08 01:00:49 (GMT)
committer: Scott Wood <scottwood@freescale.com> 2014-04-08 19:58:35 (GMT)
commit: 47d2261a3fa71cde24263559a4219a25e50d8c89 (patch)
tree: 28774d5b330ccf1b777a3af222d8356918328013 /kernel/events
parent: fb7f27080adc65cd5f341bdf56a1d0c14f316c1b (diff)
parent: 5fb9d37f27351e42f002e372074249f92cbdf815 (diff)
download: linux-fsl-qoriq-47d2261a3fa71cde24263559a4219a25e50d8c89.tar.xz
4 files changed, 219 insertions, 320 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 622e1ed..e0fd51b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly	= DEFAULT_MAX_SAMPLE_RATE;
 static int max_samples_per_tick __read_mostly	= DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
 static int perf_sample_period_ns __read_mostly	= DEFAULT_SAMPLE_PERIOD_NS;
 
-static int perf_sample_allowed_ns __read_mostly =
-	DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
+static atomic_t perf_sample_allowed_ns __read_mostly =
+	ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
 
 void update_perf_cpu_limits(void)
 {
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void)
 
 	tmp *= sysctl_perf_cpu_time_max_percent;
 	do_div(tmp, 100);
-	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
+	atomic_set(&perf_sample_allowed_ns, tmp);
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	int ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
@@ -228,15 +228,14 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
  * we detect that events are taking too long.
  */
 #define NR_ACCUMULATED_SAMPLES 128
-static DEFINE_PER_CPU(u64, running_sample_length);
+DEFINE_PER_CPU(u64, running_sample_length);
 
 void perf_sample_event_took(u64 sample_len_ns)
 {
 	u64 avg_local_sample_len;
 	u64 local_samples_len;
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
 
-	if (allowed_ns == 0)
+	if (atomic_read(&perf_sample_allowed_ns) == 0)
 		return;
 
 	/* decay the counter by 1 average sample */
@@ -252,7 +251,7 @@ void perf_sample_event_took(u64 sample_len_ns)
 	 */
 	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
 
-	if (avg_local_sample_len <= allowed_ns)
+	if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
 		return;
 
 	if (max_samples_per_tick <= 1)
@@ -263,9 +262,10 @@ void perf_sample_event_took(u64 sample_len_ns)
 	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 
 	printk_ratelimited(KERN_WARNING
-			"perf samples too long (%lld > %lld), lowering "
+			"perf samples too long (%lld > %d), lowering "
 			"kernel.perf_event_max_sample_rate to %d\n",
-			avg_local_sample_len, allowed_ns,
+			avg_local_sample_len,
+			atomic_read(&perf_sample_allowed_ns),
 			sysctl_perf_event_sample_rate);
 
 	update_perf_cpu_limits();
@@ -901,7 +901,6 @@ static void unclone_ctx(struct perf_event_context *ctx)
 		put_ctx(ctx->parent_ctx);
 		ctx->parent_ctx = NULL;
 	}
-	ctx->generation++;
 }
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1139,8 +1138,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
-
-	ctx->generation++;
 }
 
 /*
@@ -1206,9 +1203,6 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		size += sizeof(data->data_src.val);
 
-	if (sample_type & PERF_SAMPLE_TRANSACTION)
-		size += sizeof(data->txn);
-
 	event->header_size = size;
 }
 
@@ -1318,8 +1312,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	 */
 	if (event->state > PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_OFF;
-
-	ctx->generation++;
 }
 
 static void perf_group_detach(struct perf_event *event)
@@ -2156,38 +2148,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 }
 
 /*
- * Test whether two contexts are equivalent, i.e. whether they have both been
- * cloned from the same version of the same context.
- *
- * Equivalence is measured using a generation number in the context that is
- * incremented on each modification to it; see unclone_ctx(), list_add_event()
- * and list_del_event().
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
  */
 static int context_equiv(struct perf_event_context *ctx1,
 			 struct perf_event_context *ctx2)
 {
-	/* Pinning disables the swap optimization */
-	if (ctx1->pin_count || ctx2->pin_count)
-		return 0;
-
-	/* If ctx1 is the parent of ctx2 */
-	if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
-		return 1;
-
-	/* If ctx2 is the parent of ctx1 */
-	if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
-		return 1;
-
-	/*
-	 * If ctx1 and ctx2 have the same parent; we flatten the parent
-	 * hierarchy, see perf_event_init_context().
-	 */
-	if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
-			ctx1->parent_gen == ctx2->parent_gen)
-		return 1;
-
-	/* Unmatched */
-	return 0;
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
 static void __perf_event_sync_stat(struct perf_event *event,
@@ -2236,6 +2212,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
 	perf_event_update_userpage(next_event);
 }
 
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
 static void perf_event_sync_stat(struct perf_event_context *ctx,
 				   struct perf_event_context *next_ctx)
 {
@@ -2267,7 +2246,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 {
 	struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
 	struct perf_event_context *next_ctx;
-	struct perf_event_context *parent, *next_parent;
+	struct perf_event_context *parent;
 	struct perf_cpu_context *cpuctx;
 	int do_switch = 1;
 
@@ -2279,18 +2258,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		return;
 
 	rcu_read_lock();
-	next_ctx = next->perf_event_ctxp[ctxn];
-	if (!next_ctx)
-		goto unlock;
-
 	parent = rcu_dereference(ctx->parent_ctx);
-	next_parent = rcu_dereference(next_ctx->parent_ctx);
-
-	/* If neither context have a parent context; they cannot be clones. */
-	if (!parent && !next_parent)
-		goto unlock;
-
-	if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
+	next_ctx = next->perf_event_ctxp[ctxn];
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
 		/*
 		 * Looks like the two contexts are clones, so we might be
 		 * able to optimize the context switch.  We lock both
@@ -2318,7 +2289,6 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		raw_spin_unlock(&next_ctx->lock);
 		raw_spin_unlock(&ctx->lock);
 	}
-unlock:
 	rcu_read_unlock();
 
 	if (do_switch) {
@@ -4605,9 +4575,6 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		perf_output_put(handle, data->data_src.val);
 
-	if (sample_type & PERF_SAMPLE_TRANSACTION)
-		perf_output_put(handle, data->txn);
-
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -5136,26 +5103,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
-	char *name;
+	const char *name;
+
+	memset(tmp, 0, sizeof(tmp));
 
 	if (file) {
 		struct inode *inode;
 		dev_t dev;
-
-		buf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (!buf) {
-			name = "//enomem";
-			goto cpy_name;
-		}
 		/*
-		 * d_path() works from the end of the rb backwards, so we
+		 * d_path works from the end of the rb backwards, so we
 		 * need to add enough zero bytes after the string to handle
 		 * the 64bit alignment we do later.
 		 */
-		name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64));
+		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
 		if (IS_ERR(name)) {
-			name = "//toolong";
-			goto cpy_name;
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
 		}
 		inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
@@ -5163,39 +5131,34 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		gen = inode->i_generation;
 		maj = MAJOR(dev);
 		min = MINOR(dev);
-		goto got_name;
+
 	} else {
-		name = (char *)arch_vma_name(vma);
-		if (name)
-			goto cpy_name;
+		if (arch_vma_name(mmap_event->vma)) {
+			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+				       sizeof(tmp) - 1);
+			tmp[sizeof(tmp) - 1] = '\0';
+			goto got_name;
+		}
 
-		if (vma->vm_start <= vma->vm_mm->start_brk &&
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_brk &&
 				vma->vm_end >= vma->vm_mm->brk) {
-			name = "[heap]";
-			goto cpy_name;
-		}
-		if (vma->vm_start <= vma->vm_mm->start_stack &&
+			name = strncpy(tmp, "[heap]", sizeof(tmp));
+			goto got_name;
+		} else if (vma->vm_start <= vma->vm_mm->start_stack &&
 				vma->vm_end >= vma->vm_mm->start_stack) {
-			name = "[stack]";
-			goto cpy_name;
+			name = strncpy(tmp, "[stack]", sizeof(tmp));
+			goto got_name;
 		}
 
-		name = "//anon";
-		goto cpy_name;
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
 	}
 
-cpy_name:
-	strlcpy(tmp, name, sizeof(tmp));
-	name = tmp;
 got_name:
-	/*
-	 * Since our buffer works in 8 byte units we need to align our string
-	 * size to a multiple of 8. However, we must guarantee the tail end is
-	 * zero'd out to avoid leaking random bits to userspace.
-	 */
-	size = strlen(name)+1;
-	while (!IS_ALIGNED(size, sizeof(u64)))
-		name[size++] = '\0';
+	size = ALIGN(strlen(name)+1, sizeof(u64));
 
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
@@ -5684,6 +5647,11 @@ static void swevent_hlist_put(struct perf_event *event)
 {
 	int cpu;
 
+	if (event->cpu != -1) {
+		swevent_hlist_put_cpu(event, event->cpu);
+		return;
+	}
+
 	for_each_possible_cpu(cpu)
 		swevent_hlist_put_cpu(event, cpu);
 }
@@ -5717,6 +5685,9 @@ static int swevent_hlist_get(struct perf_event *event)
 	int err;
 	int cpu, failed_cpu;
 
+	if (event->cpu != -1)
+		return swevent_hlist_get_cpu(event, event->cpu);
+
 	get_online_cpus();
 	for_each_possible_cpu(cpu) {
 		err = swevent_hlist_get_cpu(event, cpu);
@@ -6325,7 +6296,6 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
 
 	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
 }
-static DEVICE_ATTR_RO(type);
 
 static ssize_t
 perf_event_mux_interval_ms_show(struct device *dev,
@@ -6370,19 +6340,17 @@ perf_event_mux_interval_ms_store(struct device *dev,
 
 	return count;
 }
-static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
 
-static struct attribute *pmu_dev_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_perf_event_mux_interval_ms.attr,
-	NULL,
+static struct device_attribute pmu_dev_attrs[] = {
+	__ATTR_RO(type),
+	__ATTR_RW(perf_event_mux_interval_ms),
+	__ATTR_NULL,
 };
-ATTRIBUTE_GROUPS(pmu_dev);
 
 static int pmu_bus_running;
 static struct bus_type pmu_bus = {
 	.name		= "event_source",
-	.dev_groups	= pmu_dev_groups,
+	.dev_attrs	= pmu_dev_attrs,
 };
 
 static void pmu_dev_release(struct device *dev)
@@ -7164,6 +7132,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	perf_install_in_context(ctx, event, event->cpu);
+	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
@@ -7246,6 +7215,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 569b2187..ca65997 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
 }
 
 #define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
-static inline unsigned long						\
+static inline unsigned int						\
 func_name(struct perf_output_handle *handle,				\
-	  const void *buf, unsigned long len)				\
+	  const void *buf, unsigned int len)				\
 {									\
 	unsigned long size, written;					\
 									\
 	do {								\
-		size    = min(handle->size, len);			\
+		size = min_t(unsigned long, handle->size, len);		\
+									\
 		written = memcpy_func(handle->addr, buf, size);		\
-		written = size - written;				\
 									\
 		len -= written;						\
 		handle->addr += written;				\
@@ -110,37 +110,20 @@ func_name(struct perf_output_handle *handle,				\
 	return len;							\
 }
 
-static inline unsigned long
-memcpy_common(void *dst, const void *src, unsigned long n)
+static inline int memcpy_common(void *dst, const void *src, size_t n)
 {
 	memcpy(dst, src, n);
-	return 0;
+	return n;
 }
 
 DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
 
-static inline unsigned long
-memcpy_skip(void *dst, const void *src, unsigned long n)
-{
-	return 0;
-}
+#define MEMCPY_SKIP(dst, src, n) (n)
 
-DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
+DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)
 
 #ifndef arch_perf_out_copy_user
-#define arch_perf_out_copy_user arch_perf_out_copy_user
-
-static inline unsigned long
-arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
-{
-	unsigned long ret;
-
-	pagefault_disable();
-	ret = __copy_from_user_inatomic(dst, src, n);
-	pagefault_enable();
-
-	return ret;
-}
+#define arch_perf_out_copy_user __copy_from_user_inatomic
 #endif
 
 DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index e8b168a..9c2ddfb 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -12,10 +12,40 @@
 #include <linux/perf_event.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
-#include <linux/circ_buf.h>
 
 #include "internal.h"
 
+static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
+			      unsigned long offset, unsigned long head)
+{
+	unsigned long sz = perf_data_size(rb);
+	unsigned long mask = sz - 1;
+
+	/*
+	 * check if user-writable
+	 * overwrite : over-write its own tail
+	 * !overwrite: buffer possibly drops events.
+	 */
+	if (rb->overwrite)
+		return true;
+
+	/*
+	 * verify that payload is not bigger than buffer
+	 * otherwise masking logic may fail to detect
+	 * the "not enough space" condition
+	 */
+	if ((head - offset) > sz)
+		return false;
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
 	atomic_set(&handle->rb->poll, POLL_IN);
@@ -85,8 +115,8 @@ again:
 	rb->user_page->data_head = head;
 
 	/*
-	 * Now check if we missed an update -- rely on previous implied
-	 * compiler barriers to force a re-read.
+	 * Now check if we missed an update, rely on the (compiler)
+	 * barrier in atomic_dec_and_test() to re-read rb->head.
 	 */
 	if (unlikely(head != local_read(&rb->head))) {
 		local_inc(&rb->nest);
@@ -105,7 +135,8 @@ int perf_output_begin(struct perf_output_handle *handle,
 {
 	struct ring_buffer *rb;
 	unsigned long tail, offset, head;
-	int have_lost, page_shift;
+	int have_lost;
+	struct perf_sample_data sample_data;
 	struct {
 		struct perf_event_header header;
 		u64			 id;
@@ -120,63 +151,57 @@ int perf_output_begin(struct perf_output_handle *handle,
 		event = event->parent;
 
 	rb = rcu_dereference(event->rb);
-	if (unlikely(!rb))
+	if (!rb)
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
-		goto out;
+	handle->rb	= rb;
+	handle->event	= event;
 
-	handle->rb    = rb;
-	handle->event = event;
+	if (!rb->nr_pages)
+		goto out;
 
 	have_lost = local_read(&rb->lost);
-	if (unlikely(have_lost)) {
-		size += sizeof(lost_event);
-		if (event->attr.sample_id_all)
-			size += event->id_header_size;
+	if (have_lost) {
+		lost_event.header.size = sizeof(lost_event);
+		perf_event_header__init_id(&lost_event.header, &sample_data,
+					   event);
+		size += lost_event.header.size;
 	}
 
 	perf_output_get_handle(handle);
 
 	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 *
+		 * See perf_output_put_handle().
+		 */
 		tail = ACCESS_ONCE(rb->user_page->data_tail);
+		smp_mb();
 		offset = head = local_read(&rb->head);
-		if (!rb->overwrite &&
-		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-			goto fail;
 		head += size;
+		if (unlikely(!perf_output_space(rb, tail, offset, head)))
+			goto fail;
 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
 
-	/*
-	 * Separate the userpage->tail read from the data stores below.
-	 * Matches the MB userspace SHOULD issue after reading the data
-	 * and before storing the new tail position.
-	 *
-	 * See perf_output_put_handle().
-	 */
-	smp_mb();
-
-	if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
+	if (head - local_read(&rb->wakeup) > rb->watermark)
 		local_add(rb->watermark, &rb->wakeup);
 
-	page_shift = PAGE_SHIFT + page_order(rb);
+	handle->page = offset >> (PAGE_SHIFT + page_order(rb));
+	handle->page &= rb->nr_pages - 1;
+	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
+	handle->addr = rb->data_pages[handle->page];
+	handle->addr += handle->size;
+	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
 
-	handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
-	offset &= (1UL << page_shift) - 1;
-	handle->addr = rb->data_pages[handle->page] + offset;
-	handle->size = (1UL << page_shift) - offset;
-
-	if (unlikely(have_lost)) {
-		struct perf_sample_data sample_data;
-
-		lost_event.header.size = sizeof(lost_event);
+	if (have_lost) {
 		lost_event.header.type = PERF_RECORD_LOST;
 		lost_event.header.misc = 0;
 		lost_event.id          = event->id;
 		lost_event.lost        = local_xchg(&rb->lost, 0);
 
-		perf_event_header__init_id(&lost_event.header,
-					   &sample_data, event);
 		perf_output_put(handle, lost_event);
 		perf_event__output_id_sample(event, handle, &sample_data);
 	}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 24b7d6c..ad8e1bd 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -35,7 +35,6 @@
 #include <linux/kdebug.h>	/* notifier mechanism */
 #include "../../mm/internal.h"	/* munlock_vma_page */
 #include <linux/percpu-rwsem.h>
-#include <linux/task_work.h>
 
 #include <linux/uprobes.h>
 
@@ -245,12 +244,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * the architecture. If an arch has variable length instruction and the
  * breakpoint instruction is not of the smallest length instruction
  * supported by that architecture then we need to modify is_trap_at_addr and
- * uprobe_write_opcode accordingly. This would never be a problem for archs
- * that have fixed length instructions.
+ * write_opcode accordingly. This would never be a problem for archs that
+ * have fixed length instructions.
  */
 
 /*
- * uprobe_write_opcode - write the opcode at a given virtual address.
+ * write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
@@ -261,7 +260,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * For mm @mm, write the opcode at @vaddr.
  * Return 0 (success) or a negative errno.
  */
-int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
+static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
 			uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
@@ -315,7 +314,7 @@ put_old:
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -330,7 +329,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
+	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -504,8 +503,9 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
 	return ret;
 }
 
-static int __copy_insn(struct address_space *mapping, struct file *filp,
-			void *insn, int nbytes, loff_t offset)
+static int
+__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
+			unsigned long nbytes, loff_t offset)
 {
 	struct page *page;
 
@@ -527,28 +527,28 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
 
 static int copy_insn(struct uprobe *uprobe, struct file *filp)
 {
-	struct address_space *mapping = uprobe->inode->i_mapping;
-	loff_t offs = uprobe->offset;
-	void *insn = uprobe->arch.insn;
-	int size = MAX_UINSN_BYTES;
-	int len, err = -EIO;
+	struct address_space *mapping;
+	unsigned long nbytes;
+	int bytes;
 
-	/* Copy only available bytes, -EIO if nothing was read */
-	do {
-		if (offs >= i_size_read(uprobe->inode))
-			break;
+	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
+	mapping = uprobe->inode->i_mapping;
 
-		len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK));
-		err = __copy_insn(mapping, filp, insn, len, offs);
-		if (err)
-			break;
-
-		insn += len;
-		offs += len;
-		size -= len;
-	} while (size);
+	/* Instruction at end of binary; copy only available bytes */
+	if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
+		bytes = uprobe->inode->i_size - uprobe->offset;
+	else
+		bytes = MAX_UINSN_BYTES;
 
-	return err;
+	/* Instruction at the page-boundary; copy bytes in second page */
+	if (nbytes < bytes) {
+		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
+				bytes - nbytes, uprobe->offset + nbytes);
+		if (err)
+			return err;
+		bytes = nbytes;
+	}
+	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
 }
 
 static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
@@ -576,7 +576,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	if (ret)
 		goto out;
 
-	/* uprobe_write_opcode() assumes we don't cross page boundary */
+	/* write_opcode() assumes we don't cross page boundary */
 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
 
@@ -1096,22 +1096,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 }
 
 /* Slot allocation for XOL */
-static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
+static int xol_add_vma(struct xol_area *area)
 {
+	struct mm_struct *mm = current->mm;
 	int ret = -EALREADY;
 
 	down_write(&mm->mmap_sem);
 	if (mm->uprobes_state.xol_area)
 		goto fail;
 
-	if (!area->vaddr) {
-		/* Try to map as high as possible, this is only a hint. */
-		area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
-						PAGE_SIZE, 0, 0);
-		if (area->vaddr & ~PAGE_MASK) {
-			ret = area->vaddr;
-			goto fail;
-		}
+	ret = -ENOMEM;
+	/* Try to map as high as possible, this is only a hint. */
+	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
+	if (area->vaddr & ~PAGE_MASK) {
+		ret = area->vaddr;
+		goto fail;
 	}
 
 	ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
@@ -1121,19 +1120,30 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	smp_wmb();	/* pairs with get_xol_area() */
 	mm->uprobes_state.xol_area = area;
+	ret = 0;
  fail:
 	up_write(&mm->mmap_sem);
 
 	return ret;
 }
 
-static struct xol_area *__create_xol_area(unsigned long vaddr)
+/*
+ * get_xol_area - Allocate process's xol_area if necessary.
+ * This area will be used for storing instructions for execution out of line.
+ *
+ * Returns the allocated area or NULL.
+ */
+static struct xol_area *get_xol_area(void)
 {
 	struct mm_struct *mm = current->mm;
-	uprobe_opcode_t insn = UPROBE_SWBP_INSN;
 	struct xol_area *area;
+	uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+
+	area = mm->uprobes_state.xol_area;
+	if (area)
+		goto ret;
 
-	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	area = kzalloc(sizeof(*area), GFP_KERNEL);
 	if (unlikely(!area))
 		goto out;
 
@@ -1145,14 +1155,13 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->page)
 		goto free_bitmap;
 
-	area->vaddr = vaddr;
-	init_waitqueue_head(&area->wq);
-	/* Reserve the 1st slot for get_trampoline_vaddr() */
+	/* allocate first slot of task's xol_area for the return probes */
 	set_bit(0, area->bitmap);
-	atomic_set(&area->slot_count, 1);
 	copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
+	atomic_set(&area->slot_count, 1);
+	init_waitqueue_head(&area->wq);
 
-	if (!xol_add_vma(mm, area))
+	if (!xol_add_vma(area))
 		return area;
 
 	__free_page(area->page);
@@ -1161,25 +1170,9 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
  free_area:
 	kfree(area);
  out:
-	return NULL;
-}
-
-/*
- * get_xol_area - Allocate process's xol_area if necessary.
- * This area will be used for storing instructions for execution out of line.
- *
- * Returns the allocated area or NULL.
- */
-static struct xol_area *get_xol_area(void)
-{
-	struct mm_struct *mm = current->mm;
-	struct xol_area *area;
-
-	if (!mm->uprobes_state.xol_area)
-		__create_xol_area(0);
-
 	area = mm->uprobes_state.xol_area;
-	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
+ ret:
+	smp_read_barrier_depends();     /* pairs with wmb in xol_add_vma() */
 	return area;
 }
 
@@ -1263,8 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 		return 0;
 
 	/* Initialize the slot */
-	copy_to_page(area->page, xol_vaddr,
-			uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
+	copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
 	/*
 	 * We probably need flush_icache_user_range() but it needs vma.
 	 * This should work on supported architectures too.
@@ -1353,6 +1345,14 @@ void uprobe_free_utask(struct task_struct *t)
 }
 
 /*
+ * Called in context of a new clone/fork from copy_process.
+ */
+void uprobe_copy_process(struct task_struct *t)
+{
+	t->utask = NULL;
+}
+
+/*
  * Allocate a uprobe_task object for the task if if necessary.
  * Called when the thread hits a breakpoint.
  *
@@ -1367,90 +1367,6 @@ static struct uprobe_task *get_utask(void)
 	return current->utask;
 }
 
-static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
-{
-	struct uprobe_task *n_utask;
-	struct return_instance **p, *o, *n;
-
-	n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
-	if (!n_utask)
-		return -ENOMEM;
-	t->utask = n_utask;
-
-	p = &n_utask->return_instances;
-	for (o = o_utask->return_instances; o; o = o->next) {
-		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
-		if (!n)
-			return -ENOMEM;
-
-		*n = *o;
-		atomic_inc(&n->uprobe->ref);
-		n->next = NULL;
-
-		*p = n;
-		p = &n->next;
-		n_utask->depth++;
-	}
-
-	return 0;
-}
-
-static void uprobe_warn(struct task_struct *t, const char *msg)
-{
-	pr_warn("uprobe: %s:%d failed to %s\n",
-			current->comm, current->pid, msg);
-}
-
-static void dup_xol_work(struct callback_head *work)
-{
-	kfree(work);
-
-	if (current->flags & PF_EXITING)
-		return;
-
-	if (!__create_xol_area(current->utask->vaddr))
-		uprobe_warn(current, "dup xol area");
-}
-
-/*
- * Called in context of a new clone/fork from copy_process.
- */
-void uprobe_copy_process(struct task_struct *t, unsigned long flags)
-{
-	struct uprobe_task *utask = current->utask;
-	struct mm_struct *mm = current->mm;
-	struct callback_head *work;
-	struct xol_area *area;
-
-	t->utask = NULL;
-
-	if (!utask || !utask->return_instances)
-		return;
-
-	if (mm == t->mm && !(flags & CLONE_VFORK))
-		return;
-
-	if (dup_utask(t, utask))
-		return uprobe_warn(t, "dup ret instances");
-
-	/* The task can fork() after dup_xol_work() fails */
-	area = mm->uprobes_state.xol_area;
-	if (!area)
-		return uprobe_warn(t, "dup xol area");
-
-	if (mm == t->mm)
-		return;
-
-	/* TODO: move it into the union in uprobe_task */
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
-	if (!work)
-		return uprobe_warn(t, "dup xol area");
-
-	t->utask->vaddr = area->vaddr;
-	init_task_work(work, dup_xol_work);
-	task_work_add(t, work, true);
-}
-
 /*
  * Current area->vaddr notion assume the trampoline address is always
  * equal area->vaddr.
@@ -1941,4 +1857,9 @@ static int __init init_uprobes(void)
 
 	return register_die_notifier(&uprobe_exception_nb);
 }
-__initcall(init_uprobes);
+module_init(init_uprobes);
+
+static void __exit exit_uprobes(void)
+{
+}
+module_exit(exit_uprobes);
author	Scott Wood <scottwood@freescale.com>	2014-04-08 01:00:49 (GMT)
committer	Scott Wood <scottwood@freescale.com>	2014-04-08 19:58:35 (GMT)
commit	47d2261a3fa71cde24263559a4219a25e50d8c89 (patch)
tree	28774d5b330ccf1b777a3af222d8356918328013 /kernel/events
parent	fb7f27080adc65cd5f341bdf56a1d0c14f316c1b (diff)
parent	5fb9d37f27351e42f002e372074249f92cbdf815 (diff)
download	linux-fsl-qoriq-47d2261a3fa71cde24263559a4219a25e50d8c89.tar.xz