summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/events/core.c168
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/generic-chip.c4
-rw-r--r--kernel/irq/irqdesc.c37
-rw-r--r--kernel/irq/irqdomain.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/lockdep.c8
-rw-r--r--kernel/posix-cpu-timers.c5
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/resource.c7
-rw-r--r--kernel/sched.c67
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sys.c53
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time/alarmtimer.c18
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--kernel/trace/trace_kprobe.c58
-rw-r--r--kernel/tsacct.c15
-rw-r--r--kernel/watchdog.c7
-rw-r--r--kernel/workqueue.c7
29 files changed, 416 insertions, 141 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d06467f..eca595e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o range.o jump_label.o
+ async.o range.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/cred.c b/kernel/cred.c
index 174fa84..8ef31f5 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new)
key_fsgid_changed(task);
/* do it
- * - What if a process setreuid()'s and this brings the
- * new uid over his NPROC rlimit? We can check this now
- * cheaply with the new uid cache, so if it matters
- * we should be checking for it. -DaveM
+ * RLIMIT_NPROC limits on user->processes have already been checked
+ * in set_user().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e2..d1a1bee 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,6 +29,7 @@
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
@@ -399,14 +400,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_restore(flags);
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /*
+ * next is NULL when called from perf_event_enable_on_exec()
+ * that will systematically cause a cgroup_switch()
+ */
+ if (next)
+ cgrp2 = perf_cgroup_from_task(next);
+
+ /*
+ * only schedule out current cgroup events if we know
+ * that we are switching to a different cgroup. Otherwise,
+ * do no touch the cgroup events.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /* prev can never be NULL */
+ cgrp2 = perf_cgroup_from_task(prev);
+
+ /*
+ * only need to schedule in cgroup events if we are changing
+ * cgroup during ctxsw. Cgroup events were not scheduled
+ * out of ctxsw out if that was not the case.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +559,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
}
@@ -1988,7 +2031,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_out(task);
+ perf_cgroup_sched_out(task, next);
}
static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2196,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2171,7 +2215,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_in(task);
+ perf_cgroup_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2471,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
* ctxswin cgroup events which are already scheduled
* in.
*/
- perf_cgroup_sched_out(current);
+ perf_cgroup_sched_out(current, NULL);
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
@@ -3353,8 +3397,8 @@ static int perf_event_index(struct perf_event *event)
}
static void calc_timer_values(struct perf_event *event,
- u64 *running,
- u64 *enabled)
+ u64 *enabled,
+ u64 *running)
{
u64 now, ctx_time;
@@ -5715,6 +5759,7 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
pmu = ERR_PTR(ret);
@@ -5722,6 +5767,7 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
goto unlock;
@@ -5848,8 +5894,6 @@ done:
return ERR_PTR(err);
}
- event->pmu = pmu;
-
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
jump_label_inc(&perf_sched_events);
@@ -6809,7 +6853,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- if (swhash->hlist_refcount > 0) {
+ if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
struct swevent_hlist *hlist;
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6898,7 +6942,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
- switch (action & ~CPU_TASKS_FROZEN) {
+ /*
+ * Ignore suspend/resume action, the perf_pm_notifier will
+ * take care of that.
+ */
+ if (action & CPU_TASKS_FROZEN)
+ return NOTIFY_OK;
+
+ switch (action) {
case CPU_UP_PREPARE:
case CPU_DOWN_FAILED:
@@ -6917,6 +6968,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
+static void perf_pm_resume_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+ if (ctx)
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static void perf_pm_suspend_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ perf_event_sched_in(cpuctx, ctx, current);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static int perf_resume(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_resume_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_suspend(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_suspend_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
+{
+ switch (action) {
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ return perf_resume();
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ return perf_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block perf_pm_notifier = {
+ .notifier_call = perf_pm,
+};
+
void __init perf_event_init(void)
{
int ret;
@@ -6931,6 +7066,7 @@ void __init perf_event_init(void)
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier);
+ register_pm_notifier(&perf_pm_notifier);
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
diff --git a/kernel/fork.c b/kernel/fork.c
index e7ceaca..8e6b6f4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_cred->user != INIT_USER)
goto bad_fork_free;
}
+ current->flags &= ~PF_NPROC_EXCEEDED;
retval = copy_creds(p, clone_flags);
if (retval < 0)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d5a3009..dc5114b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown)
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- if (desc->irq_data.chip->irq_disable)
+ else if (desc->irq_data.chip->irq_disable)
desc->irq_data.chip->irq_disable(&desc->irq_data);
else
desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 3a2cab4..e38544d 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
for (i = gc->irq_base; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
raw_spin_unlock(&gc_lock);
for (; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
/* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c60a50..039b889 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
static inline int desc_node(struct irq_desc *desc) { return 0; }
#endif
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+ struct module *owner)
{
int cpu;
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
desc->name = NULL;
+ desc->owner = owner;
for_each_possible_cpu(cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
static inline void free_masks(struct irq_desc *desc) { }
#endif
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- desc_set_defaults(irq, desc, node);
+ desc_set_defaults(irq, desc, node, owner);
return desc;
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
kfree(desc);
}
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
struct irq_desc *desc;
int i;
for (i = 0; i < cnt; i++) {
- desc = alloc_desc(start + i, node);
+ desc = alloc_desc(start + i, node, owner);
if (!desc)
goto err;
mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node);
+ desc = alloc_desc(i, node, NULL);
set_bit(i, allocated_irqs);
irq_insert_desc(i, desc);
}
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
alloc_masks(&desc[i], GFP_KERNEL, node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- desc_set_defaults(i, &desc[i], node);
+ desc_set_defaults(i, &desc[i], node, NULL);
}
return arch_early_irq_init();
}
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
dynamic_irq_cleanup(irq);
}
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ struct irq_desc *desc = irq_to_desc(start + i);
+
+ desc->owner = owner;
+ }
return start;
}
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
* @from: Start the search from this irq number
* @cnt: Number of consecutive irqs to allocate.
* @node: Preferred node on which the irq descriptor should be allocated
+ * @owner: Owning module (can be NULL)
*
* Returns the first irq number or error code
*/
int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner)
{
int start, ret;
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
- return alloc_descs(start, cnt, node);
+ return alloc_descs(start, cnt, node, owner);
err:
mutex_unlock(&sparse_irq_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
/**
* irq_reserve_irqs - mark irqs allocated
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq)
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
- desc_set_defaults(irq, desc, desc_node(desc));
+ desc_set_defaults(irq, desc, desc_node(desc), NULL);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index d5828da..b57a377 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -29,7 +29,11 @@ void irq_domain_add(struct irq_domain *domain)
*/
for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
- if (d || d->domain) {
+ if (!d) {
+ WARN(1, "error: assigning domain to non existant irq_desc");
+ return;
+ }
+ if (d->domain) {
/* things are broken; just report, don't clean up */
WARN(1, "error: irq_desc already assigned to a domain");
return;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a7840ae..9b956fa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (desc->irq_data.chip == &no_irq_chip)
return -ENOSYS;
+ if (!try_module_get(desc->owner))
+ return -ENODEV;
/*
* Some drivers like serial.c use request_irq() heavily,
* so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
nested = irq_settings_is_nested_thread(desc);
if (nested) {
- if (!new->thread_fn)
- return -EINVAL;
+ if (!new->thread_fn) {
+ ret = -EINVAL;
+ goto out_mput;
+ }
/*
* Replace the primary handler which was provided from
* the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
new->name);
- if (IS_ERR(t))
- return PTR_ERR(t);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto out_mput;
+ }
/*
* We keep the reference to the task struct even if
* the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
kthread_stop(t);
put_task_struct(t);
}
+out_mput:
+ module_put(desc->owner);
return ret;
}
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
put_task_struct(action->thread);
}
+ module_put(desc->owner);
return action;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8c24294..91d67ce 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
if (!class)
class = look_up_lock_class(lock, 0);
- if (DEBUG_LOCKS_WARN_ON(!class))
+ /*
+ * If look_up_lock_class() failed to find a class, we're trying
+ * to test if we hold a lock that has never yet been acquired.
+ * Clearly if the lock hasn't been acquired _ever_, we're not
+ * holding it either, so report failure.
+ */
+ if (!class)
return 0;
if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 58f405b..c8008dd 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += t->se.sum_exec_runtime;
+ times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
rcu_read_unlock();
@@ -312,7 +312,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
- cpu->sched = thread_group_sched_runtime(p);
+ thread_group_cputime(p, &cputime);
+ cpu->sched = cputime.sum_exec_runtime;
break;
}
return 0;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b1914cb9..3744c59 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -231,3 +231,7 @@ config PM_CLK
config PM_GENERIC_DOMAINS
bool
depends on PM
+
+config PM_GENERIC_DOMAINS_RUNTIME
+ def_bool y
+ depends on PM_RUNTIME && PM_GENERIC_DOMAINS
diff --git a/kernel/printk.c b/kernel/printk.c
index 37dff34..28a40d8 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file)
return 0;
/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
if (capable(CAP_SYS_ADMIN)) {
- WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
- "but no CAP_SYSLOG (deprecated).\n");
+ printk_once(KERN_WARNING "%s (%d): "
+ "Attempt to access syslog with CAP_SYS_ADMIN "
+ "but no CAP_SYSLOG (deprecated).\n",
+ current->comm, task_pid_nr(current));
return 0;
}
return -EPERM;
@@ -1602,7 +1604,7 @@ static int __init printk_late_init(void)
struct console *con;
for_each_console(con) {
- if (con->flags & CON_BOOT) {
+ if (!keep_bootcon && con->flags & CON_BOOT) {
printk(KERN_INFO "turn off boot console %s%d\n",
con->name, con->index);
unregister_console(con);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9de3ecf..a70d2a5 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request,
break;
si = child->last_siginfo;
- if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP))
- break;
-
- child->jobctl |= JOBCTL_LISTENING;
-
- /*
- * If NOTIFY is set, it means event happened between start
- * of this trap and now. Trigger re-trap immediately.
- */
- if (child->jobctl & JOBCTL_TRAP_NOTIFY)
- signal_wake_up(child, true);
-
+ if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) {
+ child->jobctl |= JOBCTL_LISTENING;
+ /*
+ * If NOTIFY is set, it means event happened between
+ * start of this trap and now. Trigger re-trap.
+ */
+ if (child->jobctl & JOBCTL_TRAP_NOTIFY)
+ signal_wake_up(child, true);
+ ret = 0;
+ }
unlock_task_sighand(child, &flags);
- ret = 0;
break;
case PTRACE_DETACH: /* detach a process that was attached. */
diff --git a/kernel/resource.c b/kernel/resource.c
index 3b3cedc..c8dc249 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old,
else
tmp.end = root->end;
+ if (tmp.end < tmp.start)
+ goto next;
+
resource_clip(&tmp, constraint->min, constraint->max);
arch_remove_reservations(&tmp);
@@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old,
return 0;
}
}
- if (!this)
+
+next: if (!this || this->end == root->end)
break;
+
if (this != old)
tmp.start = this->end + 1;
this = this->sibling;
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbd..b50b0f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(current);
+ perf_event_task_sched_in(prev, current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3725,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
}
/*
- * Return sum_exec_runtime for the thread group.
- * In case the task is currently running, return the sum plus current's
- * pending runtime that have not been accounted yet.
- *
- * Note that the thread group might have other running tasks as well,
- * so the return value not includes other pending runtime that other
- * running tasks might have.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
- struct task_cputime totals;
- unsigned long flags;
- struct rq *rq;
- u64 ns;
-
- rq = task_rq_lock(p, &flags);
- thread_group_cputime(p, &totals);
- ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
- task_rq_unlock(rq, p, &flags);
-
- return ns;
-}
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
@@ -4279,9 +4255,9 @@ pick_next_task(struct rq *rq)
}
/*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -4322,16 +4298,6 @@ need_resched:
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
-
- /*
- * If we are going to sleep and we have plugged IO
- * queued, make sure to submit it to avoid deadlocks.
- */
- if (blk_needs_flush_plug(prev)) {
- raw_spin_unlock(&rq->lock);
- blk_schedule_flush_plug(prev);
- raw_spin_lock(&rq->lock);
- }
}
switch_count = &prev->nvcsw;
}
@@ -4369,6 +4335,26 @@ need_resched:
if (need_resched())
goto need_resched;
}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+ if (!tsk->state)
+ return;
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+ */
+ if (blk_needs_flush_plug(tsk))
+ blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void __sched schedule(void)
+{
+ struct task_struct *tsk = current;
+
+ sched_submit_work(tsk);
+ __schedule();
+}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4421,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -4463,7 +4449,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
- schedule();
+ __schedule();
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
@@ -5588,7 +5574,7 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
add_preempt_count(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count(PREEMPT_ACTIVE);
}
@@ -7443,6 +7429,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
if (sd && (sd->flags & SD_OVERLAP))
free_sched_groups(sd->groups, 0);
+ kfree(*per_cpu_ptr(sdd->sd, j));
kfree(*per_cpu_ptr(sdd->sg, j));
kfree(*per_cpu_ptr(sdd->sgp, j));
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97540f0..af11778 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1050,7 +1050,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
*/
if (curr && unlikely(rt_task(curr)) &&
(curr->rt.nr_cpus_allowed < 2 ||
- curr->prio < p->prio) &&
+ curr->prio <= p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int target = find_lowest_rq(p);
@@ -1581,7 +1581,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
p->rt.nr_cpus_allowed > 1 &&
rt_task(rq->curr) &&
(rq->curr->rt.nr_cpus_allowed < 2 ||
- rq->curr->prio < p->prio))
+ rq->curr->prio <= p->prio))
push_rt_tasks(rq);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index a101ba3..18ee1d2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,8 @@
#include <linux/fs_struct.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -44,6 +46,8 @@
#include <linux/user_namespace.h>
#include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -621,11 +625,18 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
- new_user != INIT_USER) {
- free_uid(new_user);
- return -EAGAIN;
- }
+ new_user != INIT_USER)
+ current->flags |= PF_NPROC_EXCEEDED;
+ else
+ current->flags &= ~PF_NPROC_EXCEEDED;
free_uid(new->user);
new->user = new_user;
@@ -1154,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
#define override_architecture(name) 0
#endif
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+ int ret = 0;
+ char buf[len];
+
+ if (current->personality & UNAME26) {
+ char *rest = UTS_RELEASE;
+ int ndots = 0;
+ unsigned v;
+
+ while (*rest) {
+ if (*rest == '.' && ++ndots >= 3)
+ break;
+ if (!isdigit(*rest) && *rest != '.')
+ break;
+ rest++;
+ }
+ v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+ snprintf(buf, len, "2.6.%u%s", v, rest);
+ ret = copy_to_user(release, buf, len);
+ }
+ return ret;
+}
+
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
int errno = 0;
@@ -1163,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
errno = -EFAULT;
up_read(&uts_sem);
+ if (!errno && override_release(name->release, sizeof(name->release)))
+ errno = -EFAULT;
if (!errno && override_architecture(name))
errno = -EFAULT;
return errno;
@@ -1184,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
error = -EFAULT;
up_read(&uts_sem);
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
if (!error && override_architecture(name))
error = -EFAULT;
return error;
@@ -1218,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
if (!error && override_architecture(name))
error = -EFAULT;
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
return error ? -EFAULT : 0;
}
#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 62cbc88..a9a5de0 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
return -ENOSYS;
}
-cond_syscall(sys_nfsservctl);
cond_syscall(sys_quotactl);
cond_syscall(sys32_quotactl);
cond_syscall(sys_acct);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 3b8e028..e8bffbe 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a..362da65 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e19ce14..e660464 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = {
.cmd = TASKSTATS_CMD_GET,
.doit = taskstats_user_cmd,
.policy = taskstats_cmd_get_policy,
+ .flags = GENL_ADMIN_PERM,
};
static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f..ea5e1a9 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
+ memset(cur_setting, 0, sizeof(struct itimerspec));
+
cur_setting->it_interval =
ktime_to_timespec(timr->it.alarmtimer.period);
cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
if (!rtcdev)
return -ENOTSUPP;
- /* Save old values */
- old_setting->it_interval =
- ktime_to_timespec(timr->it.alarmtimer.period);
- old_setting->it_value =
- ktime_to_timespec(timr->it.alarmtimer.node.expires);
+ /*
+ * XXX HACK! Currently we can DOS a system if the interval
+ * period on alarmtimers is too small. Cap the interval here
+ * to 100us and solve this properly in a future patch! -jstultz
+ */
+ if ((new_setting->it_interval.tv_sec == 0) &&
+ (new_setting->it_interval.tv_nsec < 100000))
+ new_setting->it_interval.tv_nsec = 100000;
+
+ if (old_setting)
+ alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
alarm_cancel(&timr->it.alarmtimer);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e5..cd31345 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
power:power_frequency
This is for userspace compatibility
and will vanish after 5 kernel iterations,
- namely 2.6.41.
+ namely 3.1.
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa2..7c910a5 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
+ what |= MASK_TC_BIT(rw, FLUSH);
+ what |= MASK_TC_BIT(rw, FUA);
pid = tsk->pid;
if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
goto out;
}
+ if (tc & BLK_TC_FLUSH)
+ rwbs[i++] = 'F';
+
if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
else
rwbs[i++] = 'N';
+ if (tc & BLK_TC_FUA)
+ rwbs[i++] = 'F';
if (tc & BLK_TC_AHEAD)
rwbs[i++] = 'A';
- if (tc & BLK_TC_BARRIER)
- rwbs[i++] = 'B';
if (tc & BLK_TC_SYNC)
rwbs[i++] = 'S';
if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
unsigned long long ts = iter->ts;
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
static int blk_log_action(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
} mask_maps[] = {
{ BLK_TC_READ, "read" },
{ BLK_TC_WRITE, "write" },
- { BLK_TC_BARRIER, "barrier" },
+ { BLK_TC_FLUSH, "flush" },
{ BLK_TC_SYNC, "sync" },
{ BLK_TC_QUEUE, "queue" },
{ BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
{ BLK_TC_META, "meta" },
{ BLK_TC_DISCARD, "discard" },
{ BLK_TC_DRV_DATA, "drv_data" },
+ { BLK_TC_FUA, "fua" },
};
static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
{
int i = 0;
+ if (rw & REQ_FLUSH)
+ rwbs[i++] = 'F';
+
if (rw & WRITE)
rwbs[i++] = 'W';
else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
else
rwbs[i++] = 'N';
+ if (rw & REQ_FUA)
+ rwbs[i++] = 'F';
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
if (rw & REQ_SYNC)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5fb3697..00d527c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -836,11 +836,17 @@ static void __unregister_trace_probe(struct trace_probe *tp)
}
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
-static void unregister_trace_probe(struct trace_probe *tp)
+static int unregister_trace_probe(struct trace_probe *tp)
{
+ /* Enabled event can not be unregistered */
+ if (trace_probe_is_enabled(tp))
+ return -EBUSY;
+
__unregister_trace_probe(tp);
list_del(&tp->list);
unregister_probe_event(tp);
+
+ return 0;
}
/* Register a trace_probe and probe_event */
@@ -854,7 +860,9 @@ static int register_trace_probe(struct trace_probe *tp)
/* Delete old (same name) event if exist */
old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
if (old_tp) {
- unregister_trace_probe(old_tp);
+ ret = unregister_trace_probe(old_tp);
+ if (ret < 0)
+ goto end;
free_trace_probe(old_tp);
}
@@ -892,6 +900,7 @@ static int trace_probe_module_callback(struct notifier_block *nb,
mutex_lock(&probe_lock);
list_for_each_entry(tp, &probe_list, list) {
if (trace_probe_within_module(tp, mod)) {
+ /* Don't need to check busy - this should have gone. */
__unregister_trace_probe(tp);
ret = __register_trace_probe(tp);
if (ret)
@@ -1205,10 +1214,11 @@ static int create_trace_probe(int argc, char **argv)
return -ENOENT;
}
/* delete an event */
- unregister_trace_probe(tp);
- free_trace_probe(tp);
+ ret = unregister_trace_probe(tp);
+ if (ret == 0)
+ free_trace_probe(tp);
mutex_unlock(&probe_lock);
- return 0;
+ return ret;
}
if (argc < 2) {
@@ -1317,18 +1327,29 @@ error:
return ret;
}
-static void release_all_trace_probes(void)
+static int release_all_trace_probes(void)
{
struct trace_probe *tp;
+ int ret = 0;
mutex_lock(&probe_lock);
+ /* Ensure no probe is in use. */
+ list_for_each_entry(tp, &probe_list, list)
+ if (trace_probe_is_enabled(tp)) {
+ ret = -EBUSY;
+ goto end;
+ }
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
tp = list_entry(probe_list.next, struct trace_probe, list);
unregister_trace_probe(tp);
free_trace_probe(tp);
}
+
+end:
mutex_unlock(&probe_lock);
+
+ return ret;
}
/* Probes listing interfaces */
@@ -1380,9 +1401,13 @@ static const struct seq_operations probes_seq_op = {
static int probes_open(struct inode *inode, struct file *file)
{
- if ((file->f_mode & FMODE_WRITE) &&
- (file->f_flags & O_TRUNC))
- release_all_trace_probes();
+ int ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = release_all_trace_probes();
+ if (ret < 0)
+ return ret;
+ }
return seq_open(file, &probes_seq_op);
}
@@ -2055,6 +2080,21 @@ static __init int kprobe_trace_self_tests_init(void)
ret = target(1, 2, 3, 4, 5, 6);
+ /* Disable trace points before removing it */
+ tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting test probe.\n");
+ warn++;
+ } else
+ disable_trace_probe(tp, TP_FLAG_TRACE);
+
+ tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting 2nd test probe.\n");
+ warn++;
+ } else
+ disable_trace_probe(tp, TP_FLAG_TRACE);
+
ret = command_trace_probe("-:testprobe");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 24dc60d..5bbfac8 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
#define KB 1024
#define MB (1024*KB)
+#define KB_MASK (~(KB-1))
/*
* fill in extended accounting fields
*/
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
mmput(mm);
}
- stats->read_char = p->ioac.rchar;
- stats->write_char = p->ioac.wchar;
- stats->read_syscalls = p->ioac.syscr;
- stats->write_syscalls = p->ioac.syscw;
+ stats->read_char = p->ioac.rchar & KB_MASK;
+ stats->write_char = p->ioac.wchar & KB_MASK;
+ stats->read_syscalls = p->ioac.syscr & KB_MASK;
+ stats->write_syscalls = p->ioac.syscw & KB_MASK;
#ifdef CONFIG_TASK_IO_ACCOUNTING
- stats->read_bytes = p->ioac.read_bytes;
- stats->write_bytes = p->ioac.write_bytes;
- stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+ stats->read_bytes = p->ioac.read_bytes & KB_MASK;
+ stats->write_bytes = p->ioac.write_bytes & KB_MASK;
+ stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
#else
stats->read_bytes = 0;
stats->write_bytes = 0;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 36491cd..d680381 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -321,7 +321,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
*/
static int watchdog(void *unused)
{
- static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
sched_setscheduler(current, SCHED_FIFO, &param);
@@ -350,7 +350,8 @@ static int watchdog(void *unused)
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
-
+ param.sched_priority = 0;
+ sched_setscheduler(current, SCHED_NORMAL, &param);
return 0;
}
@@ -438,7 +439,7 @@ static int watchdog_enable(int cpu)
/* create the watchdog thread */
if (!p) {
- p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
+ p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
if (IS_ERR(p)) {
printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
if (!err) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25fb1b0..1783aab 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
for_each_cwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+ bool drained;
- if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+ spin_lock_irq(&cwq->gcwq->lock);
+ drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
+ spin_unlock_irq(&cwq->gcwq->lock);
+
+ if (drained)
continue;
if (++flush_cnt == 10 ||