summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/hrtimer.c13
-rw-r--r--kernel/power/snapshot.c4
-rw-r--r--kernel/power/swsusp.c4
-rw-r--r--kernel/ptrace.c28
-rw-r--r--kernel/sched.c23
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/timer.c39
9 files changed, 115 insertions, 42 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba42b0a..12815d3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
* We don't need to task_lock() this reference to tsk->cpuset,
* because tsk is already marked PF_EXITING, so attach_task() won't
* mess with it, or task is a failed fork, never visible to attach_task.
+ *
+ * Hack:
+ *
+ * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+ *
+ * Don't leave a task unable to allocate memory, as that is an
+ * accident waiting to happen should someone add a callout in
+ * do_exit() after the cpuset_exit() call that might allocate.
+ * If a task tries to allocate memory with an invalid cpuset,
+ * it will oops in cpuset_update_task_memory_state().
+ *
+ * We call cpuset_exit() while the task is still competent to
+ * handle notify_on_release(), then leave the task attached to
+ * the root cpuset (top_cpuset) for the remainder of its exit.
+ *
+ * To do this properly, we would increment the reference count on
+ * top_cpuset, and near the very end of the kernel/exit.c do_exit()
+ * code we would add a second cpuset function call, to drop that
+ * reference. This would just create an unnecessary hot spot on
+ * the top_cpuset reference count, to no avail.
+ *
+ * Normally, holding a reference to a cpuset without bumping its
+ * count is unsafe. The cpuset could go away, or someone could
+ * attach us to a different cpuset, decrementing the count on
+ * the first cpuset that we never incremented. But in this case,
+ * top_cpuset isn't going away, and either task has PF_EXITING set,
+ * which wards off any attach_task() attempts, or task is a failed
+ * fork, never visible to attach_task.
+ *
+ * Another way to do this would be to set the cpuset pointer
+ * to NULL here, and check in cpuset_update_task_memory_state()
+ * for a NULL pointer. This hack avoids that NULL check, for no
+ * cost (other than this way too long comment ;).
**/
void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
struct cpuset *cs;
cs = tsk->cpuset;
- tsk->cpuset = NULL;
+ tsk->cpuset = &top_cpuset; /* Hack - see comment above */
if (notify_on_release(cs)) {
char *pathbuf = NULL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e88b37..fbea12d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1123,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags,
p->real_parent = current;
p->parent = p->real_parent;
+ spin_lock(&current->sighand->siglock);
if (clone_flags & CLONE_THREAD) {
- spin_lock(&current->sighand->siglock);
/*
* Important: if an exit-all has been started then
* do not create this new thread - the whole thread
@@ -1162,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags,
*/
p->it_prof_expires = jiffies_to_cputime(1);
}
-
- spin_unlock(&current->sighand->siglock);
}
/*
@@ -1175,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags,
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
- attach_pid(p, PIDTYPE_PID, p->pid);
- attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
p->signal->tty = current->signal->tty;
p->signal->pgrp = process_group(current);
@@ -1186,9 +1182,12 @@ static task_t *copy_process(unsigned long clone_flags,
if (p->pid)
__get_cpu_var(process_counts)++;
}
+ attach_pid(p, PIDTYPE_TGID, p->tgid);
+ attach_pid(p, PIDTYPE_PID, p->pid);
nr_threads++;
total_forks++;
+ spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
return p;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b6e175..5ae51f1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -418,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base);
- if (mode == HRTIMER_REL)
+ if (mode == HRTIMER_REL) {
tim = ktime_add(tim, new_base->get_time());
+ /*
+ * CONFIG_TIME_LOW_RES is a temporary way for architectures
+ * to signal that they simply return xtime in
+ * do_gettimeoffset(). In this case we want to round up by
+ * resolution when starting a relative timer, to avoid short
+ * timeouts. This will go away with the GTOD framework.
+ */
+#ifdef CONFIG_TIME_LOW_RES
+ tim = ktime_add(tim, base->resolution);
+#endif
+ }
timer->expires = tim;
enqueue_hrtimer(timer, new_base);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f6636..8d5a598 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
* corrected eventually when the cases giving rise to this
* are better understood.
*/
- if (PageReserved(page)) {
- printk("highmem reserved page?!\n");
+ if (PageReserved(page))
continue;
- }
BUG_ON(PageNosave(page));
if (PageNosaveFree(page))
continue;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4e90905..2d9d08f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
{
int i;
- if (!swsusp_resume_device)
- return -ENODEV;
spin_lock(&swap_lock);
for (i = 0; i < MAX_SWAPFILES; i++) {
if (!(swap_info[i].flags & SWP_WRITEOK))
continue;
- if (is_resume_device(swap_info + i)) {
+ if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
spin_unlock(&swap_lock);
root_swap = i;
return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb..d95a72c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
*/
void __ptrace_unlink(task_t *child)
{
- if (!child->ptrace)
- BUG();
+ BUG_ON(!child->ptrace);
+
child->ptrace = 0;
if (!list_empty(&child->ptrace_list)) {
list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
return retval;
}
+void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+ child->exit_code = data;
+ /* .. re-parent .. */
+ __ptrace_unlink(child);
+ /* .. and wake it up. */
+ if (child->exit_state != EXIT_ZOMBIE)
+ wake_up_process(child);
+}
+
int ptrace_detach(struct task_struct *child, unsigned int data)
{
if (!valid_signal(data))
- return -EIO;
+ return -EIO;
/* Architecture-specific hardware disable .. */
ptrace_disable(child);
- /* .. re-parent .. */
- child->exit_code = data;
-
write_lock_irq(&tasklist_lock);
- __ptrace_unlink(child);
- /* .. and wake it up. */
- if (child->exit_state != EXIT_ZOMBIE)
- wake_up_process(child);
+ if (child->ptrace)
+ __ptrace_detach(child, data);
write_unlock_irq(&tasklist_lock);
return 0;
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
if (write) {
copy_to_user_page(vma, page, addr,
maddr + offset, buf, bytes);
- if (!PageCompound(page))
- set_page_dirty_lock(page);
+ set_page_dirty_lock(page);
} else {
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
diff --git a/kernel/sched.c b/kernel/sched.c
index 87d93be..12d291b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
}
}
- if (p->last_waker_cpu != this_cpu)
- goto out_set_cpu;
-
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
goto out_set_cpu;
@@ -1277,8 +1274,6 @@ out_set_cpu:
cpu = task_cpu(p);
}
- p->last_waker_cpu = this_cpu;
-
out_activate:
#endif /* CONFIG_SMP */
if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
#ifdef CONFIG_SCHEDSTATS
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
-#if defined(CONFIG_SMP)
- p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
p->oncpu = 0;
#endif
-#endif
#ifdef CONFIG_PREEMPT
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
@@ -5066,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
#define MAX_DOMAIN_DISTANCE 32
static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
- { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL };
+ { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
+/*
+ * Architectures may override the migration cost and thus avoid
+ * boot-time calibration. Unit is nanoseconds. Mostly useful for
+ * virtualized hardware:
+ */
+#ifdef CONFIG_DEFAULT_MIGRATION_COST
+ CONFIG_DEFAULT_MIGRATION_COST
+#else
+ -1LL
+#endif
+};
/*
* Allow override of migration cost - in units of microseconds.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f6..7654d55 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,8 +126,6 @@ extern int sysctl_hz_timer;
extern int acct_parm[];
#endif
-int randomize_va_space = 1;
-
static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
ctl_table *, void **);
static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
diff --git a/kernel/timer.c b/kernel/timer.c
index b9dad39..fe3a9a9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -717,12 +717,16 @@ static void second_overflow(void)
#endif
}
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+/*
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
{
- long time_adjust_step, delta_nsec;
+ long time_adjust_step;
- if ((time_adjust_step = time_adjust) != 0 ) {
+ time_adjust_step = time_adjust;
+ if (time_adjust_step) {
/*
* We are doing an adjtime thing. Prepare time_adjust_step to
* be within bounds. Note that a positive time_adjust means we
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void)
*/
time_adjust_step = min(time_adjust_step, (long)tickadj);
time_adjust_step = max(time_adjust_step, (long)-tickadj);
+ }
+ return time_adjust_step;
+}
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+ long time_adjust_step, delta_nsec;
+
+ time_adjust_step = adjtime_adjustment();
+ if (time_adjust_step)
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
- }
delta_nsec = tick_nsec + time_adjust_step * 1000;
/*
* Advance the phase, once it gets to one microsecond, then
@@ -759,6 +772,22 @@ static void update_wall_time_one_tick(void)
}
/*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+ long delta_nsec;
+
+ delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+ return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+
+/*
* Using a loop looks inefficient, but "ticks" is
* usually just one (we shouldn't be losing ticks,
* we're doing this this way mainly for interrupt