summaryrefslogtreecommitdiff
path: root/kernel/events/core.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-01-14 12:18:11 (GMT)
committerIngo Molnar <mingo@kernel.org>2015-04-02 15:13:46 (GMT)
commit45bfb2e50471abbbfd83d40d28c986078b0d24ff (patch)
treef06f2176a2ef51315387f492a3f1b85efe91f2bb /kernel/events/core.c
parente8c6deac69629c0cb97c3d3272f8631ef17f8f0f (diff)
downloadlinux-45bfb2e50471abbbfd83d40d28c986078b0d24ff.tar.xz
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for exporting high bandwidth data streams to userspace, such as instruction flow traces. AUX space is a ring buffer, defined by aux_{offset,size} fields in the user_page structure, and read/write pointers aux_{head,tail}, which abide by the same rules as data_* counterparts of the main perf buffer. In order to allocate/mmap AUX, userspace needs to set up aux_offset to such an offset that will be greater than data_offset+data_size and aux_size to be the desired buffer size. Both need to be page aligned. Then, same aux_offset and aux_size should be passed to mmap() call and if everything adds up, you should have an AUX buffer as a result. Pages that are mapped into this buffer also come out of user's mlock rlimit plus perf_event_mlock_kb allowance. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Kaixu Xia <kaixu.xia@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Robert Richter <rric@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c141
1 files changed, 116 insertions, 25 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6efa516..da51128 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4306,6 +4306,9 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->mmap_count);
atomic_inc(&event->rb->mmap_count);
+ if (vma->vm_pgoff)
+ atomic_inc(&event->rb->aux_mmap_count);
+
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);
}
@@ -4330,6 +4333,20 @@ static void perf_mmap_close(struct vm_area_struct *vma)
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event);
+ /*
+ * rb->aux_mmap_count will always drop before rb->mmap_count and
+ * event->mmap_count, so it is ok to use event->mmap_mutex to
+ * serialize with perf_mmap here.
+ */
+ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
+ atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+ atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
+ vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+
+ rb_free_aux(rb);
+ mutex_unlock(&event->mmap_mutex);
+ }
+
atomic_dec(&rb->mmap_count);
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4403,7 +4420,7 @@ out_put:
static const struct vm_operations_struct perf_mmap_vmops = {
.open = perf_mmap_open,
- .close = perf_mmap_close,
+ .close = perf_mmap_close, /* non mergable */
.fault = perf_mmap_fault,
.page_mkwrite = perf_mmap_fault,
};
@@ -4414,10 +4431,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long user_locked, user_lock_limit;
struct user_struct *user = current_user();
unsigned long locked, lock_limit;
- struct ring_buffer *rb;
+ struct ring_buffer *rb = NULL;
unsigned long vma_size;
unsigned long nr_pages;
- long user_extra, extra;
+ long user_extra = 0, extra = 0;
int ret = 0, flags = 0;
/*
@@ -4432,7 +4449,66 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL;
vma_size = vma->vm_end - vma->vm_start;
- nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+ if (vma->vm_pgoff == 0) {
+ nr_pages = (vma_size / PAGE_SIZE) - 1;
+ } else {
+ /*
+ * AUX area mapping: if rb->aux_nr_pages != 0, it's already
+ * mapped, all subsequent mappings should have the same size
+ * and offset. Must be above the normal perf buffer.
+ */
+ u64 aux_offset, aux_size;
+
+ if (!event->rb)
+ return -EINVAL;
+
+ nr_pages = vma_size / PAGE_SIZE;
+
+ mutex_lock(&event->mmap_mutex);
+ ret = -EINVAL;
+
+ rb = event->rb;
+ if (!rb)
+ goto aux_unlock;
+
+ aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
+ aux_size = ACCESS_ONCE(rb->user_page->aux_size);
+
+ if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
+ goto aux_unlock;
+
+ if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
+ goto aux_unlock;
+
+ /* already mapped with a different offset */
+ if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
+ goto aux_unlock;
+
+ if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
+ goto aux_unlock;
+
+ /* already mapped with a different size */
+ if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
+ goto aux_unlock;
+
+ if (!is_power_of_2(nr_pages))
+ goto aux_unlock;
+
+ if (!atomic_inc_not_zero(&rb->mmap_count))
+ goto aux_unlock;
+
+ if (rb_has_aux(rb)) {
+ atomic_inc(&rb->aux_mmap_count);
+ ret = 0;
+ goto unlock;
+ }
+
+ atomic_set(&rb->aux_mmap_count, 1);
+ user_extra = nr_pages;
+
+ goto accounting;
+ }
/*
* If we have rb pages ensure they're a power-of-two number, so we
@@ -4444,9 +4520,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
if (vma_size != PAGE_SIZE * (1 + nr_pages))
return -EINVAL;
- if (vma->vm_pgoff != 0)
- return -EINVAL;
-
WARN_ON_ONCE(event->ctx->parent_ctx);
again:
mutex_lock(&event->mmap_mutex);
@@ -4470,6 +4543,8 @@ again:
}
user_extra = nr_pages + 1;
+
+accounting:
user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
/*
@@ -4479,7 +4554,6 @@ again:
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
- extra = 0;
if (user_locked > user_lock_limit)
extra = user_locked - user_lock_limit;
@@ -4493,35 +4567,45 @@ again:
goto unlock;
}
- WARN_ON(event->rb);
+ WARN_ON(!rb && event->rb);
if (vma->vm_flags & VM_WRITE)
flags |= RING_BUFFER_WRITABLE;
- rb = rb_alloc(nr_pages,
- event->attr.watermark ? event->attr.wakeup_watermark : 0,
- event->cpu, flags);
-
if (!rb) {
- ret = -ENOMEM;
- goto unlock;
- }
+ rb = rb_alloc(nr_pages,
+ event->attr.watermark ? event->attr.wakeup_watermark : 0,
+ event->cpu, flags);
- atomic_set(&rb->mmap_count, 1);
- rb->mmap_locked = extra;
- rb->mmap_user = get_current_user();
+ if (!rb) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
- atomic_long_add(user_extra, &user->locked_vm);
- vma->vm_mm->pinned_vm += extra;
+ atomic_set(&rb->mmap_count, 1);
+ rb->mmap_user = get_current_user();
+ rb->mmap_locked = extra;
- ring_buffer_attach(event, rb);
+ ring_buffer_attach(event, rb);
- perf_event_init_userpage(event);
- perf_event_update_userpage(event);
+ perf_event_init_userpage(event);
+ perf_event_update_userpage(event);
+ } else {
+ ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, flags);
+ if (!ret)
+ rb->aux_mmap_locked = extra;
+ }
unlock:
- if (!ret)
+ if (!ret) {
+ atomic_long_add(user_extra, &user->locked_vm);
+ vma->vm_mm->pinned_vm += extra;
+
atomic_inc(&event->mmap_count);
+ } else if (rb) {
+ atomic_dec(&rb->mmap_count);
+ }
+aux_unlock:
mutex_unlock(&event->mmap_mutex);
/*
@@ -7506,6 +7590,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (output_event->clock != event->clock)
goto out;
+ /*
+ * If both events generate aux data, they must be on the same PMU
+ */
+ if (has_aux(event) && has_aux(output_event) &&
+ event->pmu != output_event->pmu)
+ goto out;
+
set:
mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */