diff options
author | Scott Wood <scottwood@freescale.com> | 2014-04-08 01:00:49 (GMT) |
---|---|---|
committer | Scott Wood <scottwood@freescale.com> | 2014-04-08 19:58:35 (GMT) |
commit | 47d2261a3fa71cde24263559a4219a25e50d8c89 (patch) | |
tree | 28774d5b330ccf1b777a3af222d8356918328013 /kernel/events | |
parent | fb7f27080adc65cd5f341bdf56a1d0c14f316c1b (diff) | |
parent | 5fb9d37f27351e42f002e372074249f92cbdf815 (diff) | |
download | linux-fsl-qoriq-47d2261a3fa71cde24263559a4219a25e50d8c89.tar.xz |
Merge branch 'merge' into sdk-v1.6.x
This reverts v3.13-rc3+ (78fd82238d0e5716) to v3.12, except for
commits which I noticed which appear relevant to the SDK.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Conflicts:
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_interrupts.S
arch/powerpc/kvm/e500.c
arch/powerpc/kvm/e500mc.c
arch/powerpc/sysdev/fsl_soc.h
drivers/Kconfig
drivers/cpufreq/ppc-corenet-cpufreq.c
drivers/dma/fsldma.c
drivers/dma/s3c24xx-dma.c
drivers/misc/Makefile
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mtd/devices/m25p80.c
drivers/net/ethernet/freescale/gianfar.h
drivers/platform/Kconfig
drivers/platform/Makefile
drivers/spi/spi-fsl-espi.c
include/crypto/algapi.h
include/linux/netdev_features.h
include/linux/skbuff.h
include/net/ip.h
net/core/ethtool.c
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 180 | ||||
-rw-r--r-- | kernel/events/internal.h | 35 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 101 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 223 |
4 files changed, 219 insertions, 320 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 622e1ed..e0fd51b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; -static int perf_sample_allowed_ns __read_mostly = - DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; +static atomic_t perf_sample_allowed_ns __read_mostly = + ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100); void update_perf_cpu_limits(void) { @@ -184,7 +184,7 @@ void update_perf_cpu_limits(void) tmp *= sysctl_perf_cpu_time_max_percent; do_div(tmp, 100); - ACCESS_ONCE(perf_sample_allowed_ns) = tmp; + atomic_set(&perf_sample_allowed_ns, tmp); } static int perf_rotate_context(struct perf_cpu_context *cpuctx); @@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + int ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write) return ret; @@ -228,15 +228,14 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, * we detect that events are taking too long. */ #define NR_ACCUMULATED_SAMPLES 128 -static DEFINE_PER_CPU(u64, running_sample_length); +DEFINE_PER_CPU(u64, running_sample_length); void perf_sample_event_took(u64 sample_len_ns) { u64 avg_local_sample_len; u64 local_samples_len; - u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); - if (allowed_ns == 0) + if (atomic_read(&perf_sample_allowed_ns) == 0) return; /* decay the counter by 1 average sample */ @@ -252,7 +251,7 @@ void perf_sample_event_took(u64 sample_len_ns) */ avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; - if (avg_local_sample_len <= allowed_ns) + if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns)) return; if (max_samples_per_tick <= 1) @@ -263,9 +262,10 @@ void perf_sample_event_took(u64 sample_len_ns) perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; printk_ratelimited(KERN_WARNING - "perf samples too long (%lld > %lld), lowering " + "perf samples too long (%lld > %d), lowering " "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns, + avg_local_sample_len, + atomic_read(&perf_sample_allowed_ns), sysctl_perf_event_sample_rate); update_perf_cpu_limits(); @@ -901,7 +901,6 @@ static void unclone_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } - ctx->generation++; } static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) @@ -1139,8 +1138,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; - - ctx->generation++; } /* @@ -1206,9 +1203,6 @@ static void perf_event__header_size(struct perf_event *event) if (sample_type & PERF_SAMPLE_DATA_SRC) size += sizeof(data->data_src.val); - if (sample_type & PERF_SAMPLE_TRANSACTION) - size += sizeof(data->txn); - event->header_size = size; } @@ -1318,8 +1312,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) */ if (event->state > PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_OFF; - - ctx->generation++; } static void perf_group_detach(struct perf_event *event) @@ -2156,38 +2148,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, } /* - * Test whether two contexts are equivalent, i.e. whether they have both been - * cloned from the same version of the same context. - * - * Equivalence is measured using a generation number in the context that is - * incremented on each modification to it; see unclone_ctx(), list_add_event() - * and list_del_event(). + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events + * in them directly with an fd; we can only enable/disable all + * events via prctl, or enable/disable all events in a family + * via ioctl, which will have the same effect on both contexts. */ static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) { - /* Pinning disables the swap optimization */ - if (ctx1->pin_count || ctx2->pin_count) - return 0; - - /* If ctx1 is the parent of ctx2 */ - if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) - return 1; - - /* If ctx2 is the parent of ctx1 */ - if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) - return 1; - - /* - * If ctx1 and ctx2 have the same parent; we flatten the parent - * hierarchy, see perf_event_init_context(). - */ - if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && - ctx1->parent_gen == ctx2->parent_gen) - return 1; - - /* Unmatched */ - return 0; + return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx + && ctx1->parent_gen == ctx2->parent_gen + && !ctx1->pin_count && !ctx2->pin_count; } static void __perf_event_sync_stat(struct perf_event *event, @@ -2236,6 +2212,9 @@ static void __perf_event_sync_stat(struct perf_event *event, perf_event_update_userpage(next_event); } +#define list_next_entry(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) + static void perf_event_sync_stat(struct perf_event_context *ctx, struct perf_event_context *next_ctx) { @@ -2267,7 +2246,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, { struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; - struct perf_event_context *parent, *next_parent; + struct perf_event_context *parent; struct perf_cpu_context *cpuctx; int do_switch = 1; @@ -2279,18 +2258,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, return; rcu_read_lock(); - next_ctx = next->perf_event_ctxp[ctxn]; - if (!next_ctx) - goto unlock; - parent = rcu_dereference(ctx->parent_ctx); - next_parent = rcu_dereference(next_ctx->parent_ctx); - - /* If neither context have a parent context; they cannot be clones. */ - if (!parent && !next_parent) - goto unlock; - - if (next_parent == ctx || next_ctx == parent || next_parent == parent) { + next_ctx = next->perf_event_ctxp[ctxn]; + if (parent && next_ctx && + rcu_dereference(next_ctx->parent_ctx) == parent) { /* * Looks like the two contexts are clones, so we might be * able to optimize the context switch. We lock both @@ -2318,7 +2289,6 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, raw_spin_unlock(&next_ctx->lock); raw_spin_unlock(&ctx->lock); } -unlock: rcu_read_unlock(); if (do_switch) { @@ -4605,9 +4575,6 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_DATA_SRC) perf_output_put(handle, data->data_src.val); - if (sample_type & PERF_SAMPLE_TRANSACTION) - perf_output_put(handle, data->txn); - if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; @@ -5136,26 +5103,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) unsigned int size; char tmp[16]; char *buf = NULL; - char *name; + const char *name; + + memset(tmp, 0, sizeof(tmp)); if (file) { struct inode *inode; dev_t dev; - - buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!buf) { - name = "//enomem"; - goto cpy_name; - } /* - * d_path() works from the end of the rb backwards, so we + * d_path works from the end of the rb backwards, so we * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); + buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); + if (!buf) { + name = strncpy(tmp, "//enomem", sizeof(tmp)); + goto got_name; + } + name = d_path(&file->f_path, buf, PATH_MAX); if (IS_ERR(name)) { - name = "//toolong"; - goto cpy_name; + name = strncpy(tmp, "//toolong", sizeof(tmp)); + goto got_name; } inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; @@ -5163,39 +5131,34 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) gen = inode->i_generation; maj = MAJOR(dev); min = MINOR(dev); - goto got_name; + } else { - name = (char *)arch_vma_name(vma); - if (name) - goto cpy_name; + if (arch_vma_name(mmap_event->vma)) { + name = strncpy(tmp, arch_vma_name(mmap_event->vma), + sizeof(tmp) - 1); + tmp[sizeof(tmp) - 1] = '\0'; + goto got_name; + } - if (vma->vm_start <= vma->vm_mm->start_brk && + if (!vma->vm_mm) { + name = strncpy(tmp, "[vdso]", sizeof(tmp)); + goto got_name; + } else if (vma->vm_start <= vma->vm_mm->start_brk && vma->vm_end >= vma->vm_mm->brk) { - name = "[heap]"; - goto cpy_name; - } - if (vma->vm_start <= vma->vm_mm->start_stack && + name = strncpy(tmp, "[heap]", sizeof(tmp)); + goto got_name; + } else if (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { - name = "[stack]"; - goto cpy_name; + name = strncpy(tmp, "[stack]", sizeof(tmp)); + goto got_name; } - name = "//anon"; - goto cpy_name; + name = strncpy(tmp, "//anon", sizeof(tmp)); + goto got_name; } -cpy_name: - strlcpy(tmp, name, sizeof(tmp)); - name = tmp; got_name: - /* - * Since our buffer works in 8 byte units we need to align our string - * size to a multiple of 8. However, we must guarantee the tail end is - * zero'd out to avoid leaking random bits to userspace. - */ - size = strlen(name)+1; - while (!IS_ALIGNED(size, sizeof(u64))) - name[size++] = '\0'; + size = ALIGN(strlen(name)+1, sizeof(u64)); mmap_event->file_name = name; mmap_event->file_size = size; @@ -5684,6 +5647,11 @@ static void swevent_hlist_put(struct perf_event *event) { int cpu; + if (event->cpu != -1) { + swevent_hlist_put_cpu(event, event->cpu); + return; + } + for_each_possible_cpu(cpu) swevent_hlist_put_cpu(event, cpu); } @@ -5717,6 +5685,9 @@ static int swevent_hlist_get(struct perf_event *event) int err; int cpu, failed_cpu; + if (event->cpu != -1) + return swevent_hlist_get_cpu(event, event->cpu); + get_online_cpus(); for_each_possible_cpu(cpu) { err = swevent_hlist_get_cpu(event, cpu); @@ -6325,7 +6296,6 @@ type_show(struct device *dev, struct device_attribute *attr, char *page) return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); } -static DEVICE_ATTR_RO(type); static ssize_t perf_event_mux_interval_ms_show(struct device *dev, @@ -6370,19 +6340,17 @@ perf_event_mux_interval_ms_store(struct device *dev, return count; } -static DEVICE_ATTR_RW(perf_event_mux_interval_ms); -static struct attribute *pmu_dev_attrs[] = { - &dev_attr_type.attr, - &dev_attr_perf_event_mux_interval_ms.attr, - NULL, +static struct device_attribute pmu_dev_attrs[] = { + __ATTR_RO(type), + __ATTR_RW(perf_event_mux_interval_ms), + __ATTR_NULL, }; -ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { .name = "event_source", - .dev_groups = pmu_dev_groups, + .dev_attrs = pmu_dev_attrs, }; static void pmu_dev_release(struct device *dev) @@ -7164,6 +7132,7 @@ SYSCALL_DEFINE5(perf_event_open, } perf_install_in_context(ctx, event, event->cpu); + ++ctx->generation; perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); @@ -7246,6 +7215,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_install_in_context(ctx, event, cpu); + ++ctx->generation; perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 569b2187..ca65997 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) } #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ -static inline unsigned long \ +static inline unsigned int \ func_name(struct perf_output_handle *handle, \ - const void *buf, unsigned long len) \ + const void *buf, unsigned int len) \ { \ unsigned long size, written; \ \ do { \ - size = min(handle->size, len); \ + size = min_t(unsigned long, handle->size, len); \ + \ written = memcpy_func(handle->addr, buf, size); \ - written = size - written; \ \ len -= written; \ handle->addr += written; \ @@ -110,37 +110,20 @@ func_name(struct perf_output_handle *handle, \ return len; \ } -static inline unsigned long -memcpy_common(void *dst, const void *src, unsigned long n) +static inline int memcpy_common(void *dst, const void *src, size_t n) { memcpy(dst, src, n); - return 0; + return n; } DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) -static inline unsigned long -memcpy_skip(void *dst, const void *src, unsigned long n) -{ - return 0; -} +#define MEMCPY_SKIP(dst, src, n) (n) -DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) +DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) #ifndef arch_perf_out_copy_user -#define arch_perf_out_copy_user arch_perf_out_copy_user - -static inline unsigned long -arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) -{ - unsigned long ret; - - pagefault_disable(); - ret = __copy_from_user_inatomic(dst, src, n); - pagefault_enable(); - - return ret; -} +#define arch_perf_out_copy_user __copy_from_user_inatomic #endif DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index e8b168a..9c2ddfb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -12,10 +12,40 @@ #include <linux/perf_event.h> #include <linux/vmalloc.h> #include <linux/slab.h> -#include <linux/circ_buf.h> #include "internal.h" +static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, + unsigned long offset, unsigned long head) +{ + unsigned long sz = perf_data_size(rb); + unsigned long mask = sz - 1; + + /* + * check if user-writable + * overwrite : over-write its own tail + * !overwrite: buffer possibly drops events. + */ + if (rb->overwrite) + return true; + + /* + * verify that payload is not bigger than buffer + * otherwise masking logic may fail to detect + * the "not enough space" condition + */ + if ((head - offset) > sz) + return false; + + offset = (offset - tail) & mask; + head = (head - tail) & mask; + + if ((int)(head - offset) < 0) + return false; + + return true; +} + static void perf_output_wakeup(struct perf_output_handle *handle) { atomic_set(&handle->rb->poll, POLL_IN); @@ -85,8 +115,8 @@ again: rb->user_page->data_head = head; /* - * Now check if we missed an update -- rely on previous implied - * compiler barriers to force a re-read. + * Now check if we missed an update, rely on the (compiler) + * barrier in atomic_dec_and_test() to re-read rb->head. */ if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); @@ -105,7 +135,8 @@ int perf_output_begin(struct perf_output_handle *handle, { struct ring_buffer *rb; unsigned long tail, offset, head; - int have_lost, page_shift; + int have_lost; + struct perf_sample_data sample_data; struct { struct perf_event_header header; u64 id; @@ -120,63 +151,57 @@ int perf_output_begin(struct perf_output_handle *handle, event = event->parent; rb = rcu_dereference(event->rb); - if (unlikely(!rb)) + if (!rb) goto out; - if (unlikely(!rb->nr_pages)) - goto out; + handle->rb = rb; + handle->event = event; - handle->rb = rb; - handle->event = event; + if (!rb->nr_pages) + goto out; have_lost = local_read(&rb->lost); - if (unlikely(have_lost)) { - size += sizeof(lost_event); - if (event->attr.sample_id_all) - size += event->id_header_size; + if (have_lost) { + lost_event.header.size = sizeof(lost_event); + perf_event_header__init_id(&lost_event.header, &sample_data, + event); + size += lost_event.header.size; } perf_output_get_handle(handle); do { + /* + * Userspace could choose to issue a mb() before updating the + * tail pointer. So that all reads will be completed before the + * write is issued. + * + * See perf_output_put_handle(). + */ tail = ACCESS_ONCE(rb->user_page->data_tail); + smp_mb(); offset = head = local_read(&rb->head); - if (!rb->overwrite && - unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) - goto fail; head += size; + if (unlikely(!perf_output_space(rb, tail, offset, head))) + goto fail; } while (local_cmpxchg(&rb->head, offset, head) != offset); - /* - * Separate the userpage->tail read from the data stores below. - * Matches the MB userspace SHOULD issue after reading the data - * and before storing the new tail position. - * - * See perf_output_put_handle(). - */ - smp_mb(); - - if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) + if (head - local_read(&rb->wakeup) > rb->watermark) local_add(rb->watermark, &rb->wakeup); - page_shift = PAGE_SHIFT + page_order(rb); + handle->page = offset >> (PAGE_SHIFT + page_order(rb)); + handle->page &= rb->nr_pages - 1; + handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); + handle->addr = rb->data_pages[handle->page]; + handle->addr += handle->size; + handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; - handle->page = (offset >> page_shift) & (rb->nr_pages - 1); - offset &= (1UL << page_shift) - 1; - handle->addr = rb->data_pages[handle->page] + offset; - handle->size = (1UL << page_shift) - offset; - - if (unlikely(have_lost)) { - struct perf_sample_data sample_data; - - lost_event.header.size = sizeof(lost_event); + if (have_lost) { lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); - perf_event_header__init_id(&lost_event.header, - &sample_data, event); perf_output_put(handle, lost_event); perf_event__output_id_sample(event, handle, &sample_data); } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 24b7d6c..ad8e1bd 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,7 +35,6 @@ #include <linux/kdebug.h> /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ #include <linux/percpu-rwsem.h> -#include <linux/task_work.h> #include <linux/uprobes.h> @@ -245,12 +244,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and - * uprobe_write_opcode accordingly. This would never be a problem for archs - * that have fixed length instructions. + * write_opcode accordingly. This would never be a problem for archs that + * have fixed length instructions. */ /* - * uprobe_write_opcode - write the opcode at a given virtual address. + * write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. @@ -261,7 +260,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * For mm @mm, write the opcode at @vaddr. * Return 0 (success) or a negative errno. */ -int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, +static int write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; @@ -315,7 +314,7 @@ put_old: */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); + return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } /** @@ -330,7 +329,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -504,8 +503,9 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) return ret; } -static int __copy_insn(struct address_space *mapping, struct file *filp, - void *insn, int nbytes, loff_t offset) +static int +__copy_insn(struct address_space *mapping, struct file *filp, char *insn, + unsigned long nbytes, loff_t offset) { struct page *page; @@ -527,28 +527,28 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, static int copy_insn(struct uprobe *uprobe, struct file *filp) { - struct address_space *mapping = uprobe->inode->i_mapping; - loff_t offs = uprobe->offset; - void *insn = uprobe->arch.insn; - int size = MAX_UINSN_BYTES; - int len, err = -EIO; + struct address_space *mapping; + unsigned long nbytes; + int bytes; - /* Copy only available bytes, -EIO if nothing was read */ - do { - if (offs >= i_size_read(uprobe->inode)) - break; + nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); + mapping = uprobe->inode->i_mapping; - len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); - err = __copy_insn(mapping, filp, insn, len, offs); - if (err) - break; - - insn += len; - offs += len; - size -= len; - } while (size); + /* Instruction at end of binary; copy only available bytes */ + if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) + bytes = uprobe->inode->i_size - uprobe->offset; + else + bytes = MAX_UINSN_BYTES; - return err; + /* Instruction at the page-boundary; copy bytes in second page */ + if (nbytes < bytes) { + int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, + bytes - nbytes, uprobe->offset + nbytes); + if (err) + return err; + bytes = nbytes; + } + return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, @@ -576,7 +576,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (ret) goto out; - /* uprobe_write_opcode() assumes we don't cross page boundary */ + /* write_opcode() assumes we don't cross page boundary */ BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); @@ -1096,22 +1096,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon } /* Slot allocation for XOL */ -static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) +static int xol_add_vma(struct xol_area *area) { + struct mm_struct *mm = current->mm; int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; - if (!area->vaddr) { - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, - PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { - ret = area->vaddr; - goto fail; - } + ret = -ENOMEM; + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; } ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1121,19 +1120,30 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; + ret = 0; fail: up_write(&mm->mmap_sem); return ret; } -static struct xol_area *__create_xol_area(unsigned long vaddr) +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) { struct mm_struct *mm = current->mm; - uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; + uprobe_opcode_t insn = UPROBE_SWBP_INSN; + + area = mm->uprobes_state.xol_area; + if (area) + goto ret; - area = kmalloc(sizeof(*area), GFP_KERNEL); + area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1145,14 +1155,13 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) if (!area->page) goto free_bitmap; - area->vaddr = vaddr; - init_waitqueue_head(&area->wq); - /* Reserve the 1st slot for get_trampoline_vaddr() */ + /* allocate first slot of task's xol_area for the return probes */ set_bit(0, area->bitmap); - atomic_set(&area->slot_count, 1); copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); + atomic_set(&area->slot_count, 1); + init_waitqueue_head(&area->wq); - if (!xol_add_vma(mm, area)) + if (!xol_add_vma(area)) return area; __free_page(area->page); @@ -1161,25 +1170,9 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) free_area: kfree(area); out: - return NULL; -} - -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) -{ - struct mm_struct *mm = current->mm; - struct xol_area *area; - - if (!mm->uprobes_state.xol_area) - __create_xol_area(0); - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + ret: + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ return area; } @@ -1263,8 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) return 0; /* Initialize the slot */ - copy_to_page(area->page, xol_vaddr, - uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); + copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. @@ -1353,6 +1345,14 @@ void uprobe_free_utask(struct task_struct *t) } /* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + +/* * Allocate a uprobe_task object for the task if if necessary. * Called when the thread hits a breakpoint. * @@ -1367,90 +1367,6 @@ static struct uprobe_task *get_utask(void) return current->utask; } -static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) -{ - struct uprobe_task *n_utask; - struct return_instance **p, *o, *n; - - n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); - if (!n_utask) - return -ENOMEM; - t->utask = n_utask; - - p = &n_utask->return_instances; - for (o = o_utask->return_instances; o; o = o->next) { - n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); - if (!n) - return -ENOMEM; - - *n = *o; - atomic_inc(&n->uprobe->ref); - n->next = NULL; - - *p = n; - p = &n->next; - n_utask->depth++; - } - - return 0; -} - -static void uprobe_warn(struct task_struct *t, const char *msg) -{ - pr_warn("uprobe: %s:%d failed to %s\n", - current->comm, current->pid, msg); -} - -static void dup_xol_work(struct callback_head *work) -{ - kfree(work); - - if (current->flags & PF_EXITING) - return; - - if (!__create_xol_area(current->utask->vaddr)) - uprobe_warn(current, "dup xol area"); -} - -/* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t, unsigned long flags) -{ - struct uprobe_task *utask = current->utask; - struct mm_struct *mm = current->mm; - struct callback_head *work; - struct xol_area *area; - - t->utask = NULL; - - if (!utask || !utask->return_instances) - return; - - if (mm == t->mm && !(flags & CLONE_VFORK)) - return; - - if (dup_utask(t, utask)) - return uprobe_warn(t, "dup ret instances"); - - /* The task can fork() after dup_xol_work() fails */ - area = mm->uprobes_state.xol_area; - if (!area) - return uprobe_warn(t, "dup xol area"); - - if (mm == t->mm) - return; - - /* TODO: move it into the union in uprobe_task */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) - return uprobe_warn(t, "dup xol area"); - - t->utask->vaddr = area->vaddr; - init_task_work(work, dup_xol_work); - task_work_add(t, work, true); -} - /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. @@ -1941,4 +1857,9 @@ static int __init init_uprobes(void) return register_die_notifier(&uprobe_exception_nb); } -__initcall(init_uprobes); +module_init(init_uprobes); + +static void __exit exit_uprobes(void) +{ +} +module_exit(exit_uprobes); |