summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-07-18 13:50:22 (GMT)
committerIngo Molnar <mingo@elte.hu>2009-07-18 13:50:40 (GMT)
commit5304d5fc74a269cc6c3e70f9713684ca729abdf0 (patch)
tree6a5db62915abd260241a2b9aee34086c93293ca6 /kernel
parent54d35f29f49224d86b994acb6e5969b9ba09022d (diff)
parent78af08d90b8f745044b1274430bc4bc6b2b27aca (diff)
downloadlinux-fsl-qoriq-5304d5fc74a269cc6c3e70f9713684ca729abdf0.tar.xz
Merge branch 'linus' into sched/core
Merge reason: branch had an old upstream base (-rc1-ish), but also merge to avoid a conflict. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c110
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c322
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c57
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c18
-rw-r--r--kernel/time/clockevents.c11
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c9
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_stack.c4
25 files changed, 426 insertions, 156 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 7afa315..9f33910 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -215,6 +215,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
static int acct_on(char *name)
{
struct file *file;
+ struct vfsmount *mnt;
int error;
struct pid_namespace *ns;
struct bsd_acct_struct *acct = NULL;
@@ -256,11 +257,12 @@ static int acct_on(char *name)
acct = NULL;
}
- mnt_pin(file->f_path.mnt);
+ mnt = file->f_path.mnt;
+ mnt_pin(mnt);
acct_file_reopen(ns->bacct, file, ns);
spin_unlock(&acct_lock);
- mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+ mntput(mnt); /* it's pinned, now give up active reference */
kfree(acct);
return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f..869dc22 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
-#include <linux/mnt_namespace.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b..bd29592 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
-#include <linux/mnt_namespace.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862..0672ff8 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
if (err < 0)
return err;
+ page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958..49da79a 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ return preferred_cpu;
+ }
+#endif
+ return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires;
+
+ if (!new_base->cpu_base->hres_active)
+ return 0;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+ return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+ return 0;
+#endif
+}
+
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
- int cpu, preferred_cpu = -1;
-
- cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- preferred_cpu = get_nohz_load_balancer();
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
-#endif
+ int this_cpu = smp_processor_id();
+ int cpu = hrtimer_get_target(this_cpu, pinned);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
if (base != new_base) {
/*
- * We are trying to schedule the timer on the local CPU.
+ * We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
- /* Optimized away for NOHZ=n SMP=n */
- if (cpu == preferred_cpu) {
- /* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
- ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
- new_base->offset);
-#else
- ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
- /*
- * Get the next event on target cpu from the
- * clock events layer.
- * This covers the highres=off nohz=on case as well.
- */
- ktime_t next = clockevents_get_next_event(cpu);
-
- ktime_t delta = ktime_sub(expires, next);
-
- /*
- * We do not migrate the timer when it is expiring
- * before the next event on the target cpu because
- * we cannot reprogram the target cpu hardware and
- * we would cause it to fire late.
- */
- if (delta.tv64 < 0) {
- cpu = smp_processor_id();
- spin_unlock(&new_base->cpu_base->lock);
- spin_lock(&base->cpu_base->lock);
- timer->base = base;
- goto again;
- }
+ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+ cpu = this_cpu;
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
}
timer->base = new_base;
}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX;
+ spin_lock(&cpu_base->lock);
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
- spin_lock(&cpu_base->lock);
-
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer);
}
- spin_unlock(&cpu_base->lock);
base++;
}
+ /*
+ * Store the new expiry value so the migration code can verify
+ * against it.
+ */
cpu_base->expires_next = expires_next;
+ spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bed..385c31a 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
#include <linux/unistd.h>
#include <linux/kmod.h>
#include <linux/slab.h>
-#include <linux/mnt_namespace.h>
#include <linux/completion.h>
#include <linux/file.h>
#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b..16b5739 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
- int safety;
/* Ensure no-one is preepmted on the garbages */
- mutex_unlock(&kprobe_insn_mutex);
- safety = check_safety();
- mutex_lock(&kprobe_insn_mutex);
- if (safety != 0)
+ if (check_safety())
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fc..0a04983 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return ret;
}
if (ret > 0) {
- printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
- "it should follow 0/-E convention\n"
- KERN_WARNING "%s: loading module anyway...\n",
+ printk(KERN_WARNING
+"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
+"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1a933a2..a641eb7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
list_add_rcu(&counter->event_entry, &ctx->event_list);
ctx->nr_counters++;
+ if (counter->attr.inherit_stat)
+ ctx->nr_stat++;
}
/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
if (list_empty(&counter->list_entry))
return;
ctx->nr_counters--;
+ if (counter->attr.inherit_stat)
+ ctx->nr_stat--;
list_del_init(&counter->list_entry);
list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,81 @@ static int context_equiv(struct perf_counter_context *ctx1,
&& !ctx1->pin_count && !ctx2->pin_count;
}
+static void __perf_counter_read(void *counter);
+
+static void __perf_counter_sync_stat(struct perf_counter *counter,
+ struct perf_counter *next_counter)
+{
+ u64 value;
+
+ if (!counter->attr.inherit_stat)
+ return;
+
+ /*
+ * Update the counter value, we cannot use perf_counter_read()
+ * because we're in the middle of a context switch and have IRQs
+ * disabled, which upsets smp_call_function_single(), however
+ * we know the counter must be on the current CPU, therefore we
+ * don't need to use it.
+ */
+ switch (counter->state) {
+ case PERF_COUNTER_STATE_ACTIVE:
+ __perf_counter_read(counter);
+ break;
+
+ case PERF_COUNTER_STATE_INACTIVE:
+ update_counter_times(counter);
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * In order to keep per-task stats reliable we need to flip the counter
+ * values when we flip the contexts.
+ */
+ value = atomic64_read(&next_counter->count);
+ value = atomic64_xchg(&counter->count, value);
+ atomic64_set(&next_counter->count, value);
+
+ swap(counter->total_time_enabled, next_counter->total_time_enabled);
+ swap(counter->total_time_running, next_counter->total_time_running);
+
+ /*
+ * Since we swizzled the values, update the user visible data too.
+ */
+ perf_counter_update_userpage(counter);
+ perf_counter_update_userpage(next_counter);
+}
+
+#define list_next_entry(pos, member) \
+ list_entry(pos->member.next, typeof(*pos), member)
+
+static void perf_counter_sync_stat(struct perf_counter_context *ctx,
+ struct perf_counter_context *next_ctx)
+{
+ struct perf_counter *counter, *next_counter;
+
+ if (!ctx->nr_stat)
+ return;
+
+ counter = list_first_entry(&ctx->event_list,
+ struct perf_counter, event_entry);
+
+ next_counter = list_first_entry(&next_ctx->event_list,
+ struct perf_counter, event_entry);
+
+ while (&counter->event_entry != &ctx->event_list &&
+ &next_counter->event_entry != &next_ctx->event_list) {
+
+ __perf_counter_sync_stat(counter, next_counter);
+
+ counter = list_next_entry(counter, event_entry);
+ next_counter = list_next_entry(counter, event_entry);
+ }
+}
+
/*
* Called from scheduler to remove the counters of the current task,
* with interrupts disabled.
@@ -1061,6 +1140,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
ctx->task = next;
next_ctx->task = task;
do_switch = 0;
+
+ perf_counter_sync_stat(ctx, next_ctx);
}
spin_unlock(&next_ctx->lock);
spin_unlock(&ctx->lock);
@@ -1348,9 +1429,56 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
}
/*
+ * Enable all of a task's counters that have been marked enable-on-exec.
+ * This expects task == current.
+ */
+static void perf_counter_enable_on_exec(struct task_struct *task)
+{
+ struct perf_counter_context *ctx;
+ struct perf_counter *counter;
+ unsigned long flags;
+ int enabled = 0;
+
+ local_irq_save(flags);
+ ctx = task->perf_counter_ctxp;
+ if (!ctx || !ctx->nr_counters)
+ goto out;
+
+ __perf_counter_task_sched_out(ctx);
+
+ spin_lock(&ctx->lock);
+
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (!counter->attr.enable_on_exec)
+ continue;
+ counter->attr.enable_on_exec = 0;
+ if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+ continue;
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->tstamp_enabled =
+ ctx->time - counter->total_time_enabled;
+ enabled = 1;
+ }
+
+ /*
+ * Unclone this context if we enabled any counter.
+ */
+ if (enabled && ctx->parent_ctx) {
+ put_ctx(ctx->parent_ctx);
+ ctx->parent_ctx = NULL;
+ }
+
+ spin_unlock(&ctx->lock);
+
+ perf_counter_task_sched_in(task, smp_processor_id());
+ out:
+ local_irq_restore(flags);
+}
+
+/*
* Cross CPU call to read the hardware counter
*/
-static void __read(void *info)
+static void __perf_counter_read(void *info)
{
struct perf_counter *counter = info;
struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1500,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
*/
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
smp_call_function_single(counter->oncpu,
- __read, counter, 1);
+ __perf_counter_read, counter, 1);
} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
update_counter_times(counter);
}
@@ -1508,11 +1636,13 @@ static void free_counter(struct perf_counter *counter)
{
perf_pending_sync(counter);
- atomic_dec(&nr_counters);
- if (counter->attr.mmap)
- atomic_dec(&nr_mmap_counters);
- if (counter->attr.comm)
- atomic_dec(&nr_comm_counters);
+ if (!counter->parent) {
+ atomic_dec(&nr_counters);
+ if (counter->attr.mmap)
+ atomic_dec(&nr_mmap_counters);
+ if (counter->attr.comm)
+ atomic_dec(&nr_comm_counters);
+ }
if (counter->destroy)
counter->destroy(counter);
@@ -1751,6 +1881,14 @@ int perf_counter_task_disable(void)
return 0;
}
+static int perf_counter_index(struct perf_counter *counter)
+{
+ if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+ return 0;
+
+ return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
+}
+
/*
* Callers need to ensure there can be no nesting of this function, otherwise
* the seqlock logic goes bad. We can not serialize this because the arch
@@ -1775,11 +1913,17 @@ void perf_counter_update_userpage(struct perf_counter *counter)
preempt_disable();
++userpg->lock;
barrier();
- userpg->index = counter->hw.idx;
+ userpg->index = perf_counter_index(counter);
userpg->offset = atomic64_read(&counter->count);
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
userpg->offset -= atomic64_read(&counter->hw.prev_count);
+ userpg->time_enabled = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+
+ userpg->time_running = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+
barrier();
++userpg->lock;
preempt_enable();
@@ -1876,7 +2020,7 @@ fail:
static void perf_mmap_free_page(unsigned long addr)
{
- struct page *page = virt_to_page(addr);
+ struct page *page = virt_to_page((void *)addr);
page->mapping = NULL;
__free_page(page);
@@ -2483,15 +2627,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
u32 cpu, reserved;
} cpu_entry;
- header.type = 0;
+ header.type = PERF_EVENT_SAMPLE;
header.size = sizeof(header);
- header.misc = PERF_EVENT_MISC_OVERFLOW;
+ header.misc = 0;
header.misc |= perf_misc_flags(data->regs);
if (sample_type & PERF_SAMPLE_IP) {
ip = perf_instruction_pointer(data->regs);
- header.type |= PERF_SAMPLE_IP;
header.size += sizeof(ip);
}
@@ -2500,7 +2643,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
tid_entry.pid = perf_counter_pid(counter, current);
tid_entry.tid = perf_counter_tid(counter, current);
- header.type |= PERF_SAMPLE_TID;
header.size += sizeof(tid_entry);
}
@@ -2510,34 +2652,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
*/
time = sched_clock();
- header.type |= PERF_SAMPLE_TIME;
header.size += sizeof(u64);
}
- if (sample_type & PERF_SAMPLE_ADDR) {
- header.type |= PERF_SAMPLE_ADDR;
+ if (sample_type & PERF_SAMPLE_ADDR)
header.size += sizeof(u64);
- }
- if (sample_type & PERF_SAMPLE_ID) {
- header.type |= PERF_SAMPLE_ID;
+ if (sample_type & PERF_SAMPLE_ID)
header.size += sizeof(u64);
- }
if (sample_type & PERF_SAMPLE_CPU) {
- header.type |= PERF_SAMPLE_CPU;
header.size += sizeof(cpu_entry);
cpu_entry.cpu = raw_smp_processor_id();
}
- if (sample_type & PERF_SAMPLE_PERIOD) {
- header.type |= PERF_SAMPLE_PERIOD;
+ if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- }
if (sample_type & PERF_SAMPLE_GROUP) {
- header.type |= PERF_SAMPLE_GROUP;
header.size += sizeof(u64) +
counter->nr_siblings * sizeof(group_entry);
}
@@ -2547,10 +2680,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (callchain) {
callchain_size = (1 + callchain->nr) * sizeof(u64);
-
- header.type |= PERF_SAMPLE_CALLCHAIN;
header.size += callchain_size;
- }
+ } else
+ header.size += sizeof(u64);
}
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2601,13 +2733,79 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
}
}
- if (callchain)
- perf_output_copy(&handle, callchain, callchain_size);
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ if (callchain)
+ perf_output_copy(&handle, callchain, callchain_size);
+ else {
+ u64 nr = 0;
+ perf_output_put(&handle, nr);
+ }
+ }
perf_output_end(&handle);
}
/*
+ * read event
+ */
+
+struct perf_read_event {
+ struct perf_event_header header;
+
+ u32 pid;
+ u32 tid;
+ u64 value;
+ u64 format[3];
+};
+
+static void
+perf_counter_read_event(struct perf_counter *counter,
+ struct task_struct *task)
+{
+ struct perf_output_handle handle;
+ struct perf_read_event event = {
+ .header = {
+ .type = PERF_EVENT_READ,
+ .misc = 0,
+ .size = sizeof(event) - sizeof(event.format),
+ },
+ .pid = perf_counter_pid(counter, task),
+ .tid = perf_counter_tid(counter, task),
+ .value = atomic64_read(&counter->count),
+ };
+ int ret, i = 0;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ event.header.size += sizeof(u64);
+ event.format[i++] = counter->total_time_enabled;
+ }
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ event.header.size += sizeof(u64);
+ event.format[i++] = counter->total_time_running;
+ }
+
+ if (counter->attr.read_format & PERF_FORMAT_ID) {
+ u64 id;
+
+ event.header.size += sizeof(u64);
+ if (counter->parent)
+ id = counter->parent->id;
+ else
+ id = counter->id;
+
+ event.format[i++] = id;
+ }
+
+ ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
+ if (ret)
+ return;
+
+ perf_output_copy(&handle, &event, event.header.size);
+ perf_output_end(&handle);
+}
+
+/*
* fork tracking
*/
@@ -2798,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task)
{
struct perf_comm_event comm_event;
+ if (task->perf_counter_ctxp)
+ perf_counter_enable_on_exec(task);
+
if (!atomic_read(&nr_comm_counters))
return;
@@ -3317,8 +3518,8 @@ out:
put_cpu_var(perf_cpu_context);
}
-void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+void __perf_swcounter_event(u32 event, u64 nr, int nmi,
+ struct pt_regs *regs, u64 addr)
{
struct perf_sample_data data = {
.regs = regs,
@@ -3509,9 +3710,21 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
}
#endif
+atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
+
+static void sw_perf_counter_destroy(struct perf_counter *counter)
+{
+ u64 event = counter->attr.config;
+
+ WARN_ON(counter->parent);
+
+ atomic_dec(&perf_swcounter_enabled[event]);
+}
+
static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
{
const struct pmu *pmu = NULL;
+ u64 event = counter->attr.config;
/*
* Software counters (currently) can't in general distinguish
@@ -3520,7 +3733,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
* to be kernel events, and page faults are never hypervisor
* events.
*/
- switch (counter->attr.config) {
+ switch (event) {
case PERF_COUNT_SW_CPU_CLOCK:
pmu = &perf_ops_cpu_clock;
@@ -3541,6 +3754,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS:
+ if (!counter->parent) {
+ atomic_inc(&perf_swcounter_enabled[event]);
+ counter->destroy = sw_perf_counter_destroy;
+ }
pmu = &perf_ops_generic;
break;
}
@@ -3556,6 +3773,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
int cpu,
struct perf_counter_context *ctx,
struct perf_counter *group_leader,
+ struct perf_counter *parent_counter,
gfp_t gfpflags)
{
const struct pmu *pmu;
@@ -3591,6 +3809,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
counter->ctx = ctx;
counter->oncpu = -1;
+ counter->parent = parent_counter;
+
counter->ns = get_pid_ns(current->nsproxy->pid_ns);
counter->id = atomic64_inc_return(&perf_counter_id);
@@ -3648,11 +3868,13 @@ done:
counter->pmu = pmu;
- atomic_inc(&nr_counters);
- if (counter->attr.mmap)
- atomic_inc(&nr_mmap_counters);
- if (counter->attr.comm)
- atomic_inc(&nr_comm_counters);
+ if (!counter->parent) {
+ atomic_inc(&nr_counters);
+ if (counter->attr.mmap)
+ atomic_inc(&nr_mmap_counters);
+ if (counter->attr.comm)
+ atomic_inc(&nr_comm_counters);
+ }
return counter;
}
@@ -3815,7 +4037,7 @@ SYSCALL_DEFINE5(perf_counter_open,
}
counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
- GFP_KERNEL);
+ NULL, GFP_KERNEL);
ret = PTR_ERR(counter);
if (IS_ERR(counter))
goto err_put_context;
@@ -3881,7 +4103,8 @@ inherit_counter(struct perf_counter *parent_counter,
child_counter = perf_counter_alloc(&parent_counter->attr,
parent_counter->cpu, child_ctx,
- group_leader, GFP_KERNEL);
+ group_leader, parent_counter,
+ GFP_KERNEL);
if (IS_ERR(child_counter))
return child_counter;
get_ctx(child_ctx);
@@ -3904,12 +4127,6 @@ inherit_counter(struct perf_counter *parent_counter,
*/
add_counter_to_ctx(child_counter, child_ctx);
- child_counter->parent = parent_counter;
- /*
- * inherit into child's child as well:
- */
- child_counter->attr.inherit = 1;
-
/*
* Get a reference to the parent filp - we will fput it
* when the child counter exits. This is safe to do because
@@ -3953,10 +4170,14 @@ static int inherit_group(struct perf_counter *parent_counter,
}
static void sync_child_counter(struct perf_counter *child_counter,
- struct perf_counter *parent_counter)
+ struct task_struct *child)
{
+ struct perf_counter *parent_counter = child_counter->parent;
u64 child_val;
+ if (child_counter->attr.inherit_stat)
+ perf_counter_read_event(child_counter, child);
+
child_val = atomic64_read(&child_counter->count);
/*
@@ -3985,7 +4206,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
static void
__perf_counter_exit_task(struct perf_counter *child_counter,
- struct perf_counter_context *child_ctx)
+ struct perf_counter_context *child_ctx,
+ struct task_struct *child)
{
struct perf_counter *parent_counter;
@@ -3999,7 +4221,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter,
* counters need to be zapped - but otherwise linger.
*/
if (parent_counter) {
- sync_child_counter(child_counter, parent_counter);
+ sync_child_counter(child_counter, child);
free_counter(child_counter);
}
}
@@ -4061,7 +4283,7 @@ void perf_counter_exit_task(struct task_struct *child)
again:
list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
list_entry)
- __perf_counter_exit_task(child_counter, child_ctx);
+ __perf_counter_exit_task(child_counter, child_ctx, child);
/*
* If the last counter was a group counter, it will have appended all
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375..bf0014d 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
-#include <linux/smp_lock.h>
#include <scsi/scsi_scan.h>
#include <asm/uaccess.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2..082c320 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
* interference; SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
- retval = mutex_lock_interruptible(&task->cred_guard_mutex);
- if (retval < 0)
+ retval = -ERESTARTNOINTR;
+ if (mutex_lock_interruptible(&task->cred_guard_mutex))
goto out;
task_lock(task);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbb..7717b95 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
int j;
struct rcu_node *rnp;
- printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+ printk(KERN_INFO "Hierarchical RCU implementation.\n");
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
- printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
}
module_param(blimit, int, 0);
diff --git a/kernel/resource.c b/kernel/resource.c
index ac5f3a3..78b0872 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -787,7 +787,7 @@ static int __init reserve_setup(char *str)
static struct resource reserve[MAXRESERVE];
for (;;) {
- int io_start, io_num;
+ unsigned int io_start, io_num;
int x = reserved;
if (get_option (&str, &io_start) != 2)
diff --git a/kernel/sched.c b/kernel/sched.c
index ebc5151..03f7e3f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
#endif
#ifdef CONFIG_SMP
unsigned long rt_nr_migratory;
+ unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
#endif
@@ -2572,15 +2573,37 @@ static void __sched_fork(struct task_struct *p)
p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
#ifdef CONFIG_SCHEDSTATS
- p->se.wait_start = 0;
- p->se.sum_sleep_runtime = 0;
- p->se.sleep_start = 0;
- p->se.block_start = 0;
- p->se.sleep_max = 0;
- p->se.block_max = 0;
- p->se.exec_max = 0;
- p->se.slice_max = 0;
- p->se.wait_max = 0;
+ p->se.wait_start = 0;
+ p->se.wait_max = 0;
+ p->se.wait_count = 0;
+ p->se.wait_sum = 0;
+
+ p->se.sleep_start = 0;
+ p->se.sleep_max = 0;
+ p->se.sum_sleep_runtime = 0;
+
+ p->se.block_start = 0;
+ p->se.block_max = 0;
+ p->se.exec_max = 0;
+ p->se.slice_max = 0;
+
+ p->se.nr_migrations_cold = 0;
+ p->se.nr_failed_migrations_affine = 0;
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
+ p->se.nr_forced2_migrations = 0;
+
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+ p->se.nr_wakeups_migrate = 0;
+ p->se.nr_wakeups_local = 0;
+ p->se.nr_wakeups_remote = 0;
+ p->se.nr_wakeups_affine = 0;
+ p->se.nr_wakeups_affine_attempts = 0;
+ p->se.nr_wakeups_passive = 0;
+ p->se.nr_wakeups_idle = 0;
+
#endif
INIT_LIST_HEAD(&p->rt.run_list);
@@ -6580,6 +6603,11 @@ SYSCALL_DEFINE0(sched_yield)
return 0;
}
+static inline int should_resched(void)
+{
+ return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
+}
+
static void __cond_resched(void)
{
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6599,8 +6627,7 @@ static void __cond_resched(void)
int __sched _cond_resched(void)
{
- if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
- system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
__cond_resched();
return 1;
}
@@ -6618,12 +6645,12 @@ EXPORT_SYMBOL(_cond_resched);
*/
int cond_resched_lock(spinlock_t *lock)
{
- int resched = need_resched() && system_state == SYSTEM_RUNNING;
+ int resched = should_resched();
int ret = 0;
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched && need_resched())
+ if (resched)
__cond_resched();
else
cpu_relax();
@@ -6638,7 +6665,7 @@ int __sched cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
- if (need_resched() && system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
local_bh_enable();
__cond_resched();
local_bh_disable();
@@ -9109,7 +9136,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
- plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+ plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
#endif
rt_rq->rt_time = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e..7c248dc 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -687,7 +687,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
* all of which have the same weight.
*/
if (sched_feat(NORMALIZED_SLEEPER) &&
- task_of(se)->policy != SCHED_IDLE)
+ (!entity_is_task(se) ||
+ task_of(se)->policy != SCHED_IDLE))
thresh = calc_delta_fair(thresh, se);
vruntime -= thresh;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a..3918e01 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
#ifdef CONFIG_RT_GROUP_SCHED
+#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
#else /* CONFIG_RT_GROUP_SCHED */
+#define rt_entity_is_task(rt_se) (1)
+
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
static void update_rt_migration(struct rt_rq *rt_rq)
{
- if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
+ if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
if (!rt_rq->overloaded) {
rt_set_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total++;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory++;
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (!rt_entity_is_task(rt_se))
+ return;
+
+ rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+ rt_rq->rt_nr_total--;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory--;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd4..a6dcd67 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
- struct tick_device *td;
- struct clock_event_device *dev;
-
- td = &per_cpu(tick_cpu_device, cpu);
- dev = td->evtdev;
-
- return dev->next_event;
-}
#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47..019f380 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
the timings of the initcalls and traces key events and the identity
of tasks that can cause boot delays, such as context-switches.
- Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ Its aim is to be parsed by the scripts/bootgraph.pl tool to
produce pretty graphics about boot inefficiencies, giving a visual
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
- You must pass in ftrace=initcall to the kernel command line
- to enable this on bootup.
+ You must pass in initcall_debug and ftrace=initcall to the kernel
+ command line to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af..1090b0a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
+#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf..4521c77 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
.stat_show = function_stat_show
};
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
* The files created are permanent, if something happens
* we still do not free memory.
*/
- kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
@@ -3160,10 +3159,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
- if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+ if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
- last_ftrace_enabled = ftrace_enabled;
+ last_ftrace_enabled = !!ftrace_enabled;
if (ftrace_enabled) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0d..8bc8d8a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e37..6db005e 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
ftrace_graph_ret_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ret.func, func)
+ TRACE_FIELD(unsigned long long, ret.calltime, calltime)
+ TRACE_FIELD(unsigned long long, ret.rettime, rettime)
+ TRACE_FIELD(unsigned long, ret.overrun, overrun)
TRACE_FIELD(int, ret.depth, depth)
),
TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 7402144..75ef000 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -363,7 +363,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
out_reg:
ret = register_ftrace_function_probe(glob, ops, count);
- return ret;
+ return ret < 0 ? ret : 0;
}
static struct ftrace_func_command ftrace_traceon_cmd = {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3a..e0c2545 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
- s->buffer[len] = 0;
- seq_puts(m, s->buffer);
+ seq_write(m, s->buffer, len);
trace_seq_init(s);
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd..e644af9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
if (ret || !write ||
- (last_stack_tracer_enabled == stack_tracer_enabled))
+ (last_stack_tracer_enabled == !!stack_tracer_enabled))
goto out;
- last_stack_tracer_enabled = stack_tracer_enabled;
+ last_stack_tracer_enabled = !!stack_tracer_enabled;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);