From 019a1369667c3978f9644982ebe6d261dd2bbc40 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 29 Jan 2009 11:49:18 -0800
Subject: x86: uaccess: fix compilation error on CONFIG_M386

In case of !CONFIG_X86_WP_WORKS_OK, __put_user_size_ex() is not defined.
Add macros for !CONFIG_X86_WP_WORKS_OK case.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0ec6de4..b9a2415 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -525,8 +525,6 @@ struct __large_struct { unsigned long buf[100]; };
  */
 #define get_user_try		uaccess_try
 #define get_user_catch(err)	uaccess_catch(err)
-#define put_user_try		uaccess_try
-#define put_user_catch(err)	uaccess_catch(err)
 
 #define get_user_ex(x, ptr)	do {					\
 	unsigned long __gue_val;					\
@@ -534,9 +532,29 @@ struct __large_struct { unsigned long buf[100]; };
 	(x) = (__force __typeof__(*(ptr)))__gue_val;			\
 } while (0)
 
+#ifdef CONFIG_X86_WP_WORKS_OK
+
+#define put_user_try		uaccess_try
+#define put_user_catch(err)	uaccess_catch(err)
+
 #define put_user_ex(x, ptr)						\
 	__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
+#else /* !CONFIG_X86_WP_WORKS_OK */
+
+#define put_user_try		do {		\
+	int __uaccess_err = 0;
+
+#define put_user_catch(err)			\
+	(err) |= __uaccess_err;			\
+} while (0)
+
+#define put_user_ex(x, ptr)	do {		\
+	__uaccess_err |= __put_user(x, ptr);	\
+} while (0)
+
+#endif /* CONFIG_X86_WP_WORKS_OK */
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
-- 
cgit v0.10.2


From 18114f61359ac05e3aa797d53d63f40db41f798d Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 30 Jan 2009 18:16:46 -0800
Subject: x86: uaccess: use errret as error value in __put_user_size()

Impact: cleanup

In __put_user_size() macro errret is used for error value.
But if size is 8, errret isn't passed to__put_user_asm_u64().
This behavior is inconsistent.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b9a2415..b685ece 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
 
 
 #ifdef CONFIG_X86_32
-#define __put_user_asm_u64(x, addr, err)				\
+#define __put_user_asm_u64(x, addr, err, errret)			\
 	asm volatile("1:	movl %%eax,0(%2)\n"			\
 		     "2:	movl %%edx,4(%2)\n"			\
 		     "3:\n"						\
@@ -197,7 +197,7 @@ extern int __get_user_bad(void);
 		     _ASM_EXTABLE(1b, 4b)				\
 		     _ASM_EXTABLE(2b, 4b)				\
 		     : "=r" (err)					\
-		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+		     : "A" (x), "r" (addr), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex_u64(x, addr)					\
 	asm volatile("1:	movl %%eax,0(%1)\n"			\
@@ -211,8 +211,8 @@ extern int __get_user_bad(void);
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 #else
-#define __put_user_asm_u64(x, ptr, retval) \
-	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#define __put_user_asm_u64(x, ptr, retval, errret) \
+	__put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
 #define __put_user_asm_ex_u64(x, addr)	\
 	__put_user_asm_ex(x, addr, "q", "", "Zr")
 #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
@@ -289,7 +289,8 @@ do {									\
 		__put_user_asm(x, ptr, retval, "l", "k", "ir", errret);	\
 		break;							\
 	case 8:								\
-		__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval);	\
+		__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval,	\
+				   errret);				\
 		break;							\
 	default:							\
 		__put_user_bad();					\
-- 
cgit v0.10.2


From 130ace11a9dc682541336d1fe5cb3bc7771a149e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Feb 2009 00:57:48 +0900
Subject: x86: style cleanups for xen assemblies

Make the following style cleanups:

* drop unnecessary //#include from xen-asm_32.S
* compulsive adding of space after comma
* reformat multiline comments

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 4c6f967..79d7362 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -1,14 +1,14 @@
 /*
-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-	The inline versions are the same as the direct-use versions, with the
-	pre- and post-amble chopped off.
-
-	This code is encoded for size rather than absolute efficiency,
-	with a view to being able to inline as much as possible.
-
-	We only bother with direct forms (ie, vcpu in percpu data) of
-	the operations here; the indirect forms are better handled in
-	C, since they're generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining.  The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in percpu data) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
  */
 
 #include <asm/asm-offsets.h>
@@ -18,17 +18,19 @@
 #include "xen-asm.h"
 
 /*
-	Enable events.  This clears the event mask and tests the pending
-	event status with one and operation.  If there are pending
-	events, then enter the hypervisor to get them handled.
+ * Enable events.  This clears the event mask and tests the pending
+ * event status with one and operation.  If there are pending events,
+ * then enter the hypervisor to get them handled.
  */
 ENTRY(xen_irq_enable_direct)
 	/* Unmask events */
 	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
+	/*
+	 * Preempt here doesn't matter because that will deal with any
+	 * pending interrupts.  The pending check may end up being run
+	 * on the wrong CPU, but that doesn't hurt.
+	 */
 
 	/* Test for pending */
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
 
 
 /*
-	Disabling events is simply a matter of making the event mask
-	non-zero.
+ * Disabling events is simply a matter of making the event mask
+ * non-zero.
  */
 ENTRY(xen_irq_disable_direct)
 	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct)
 	RELOC(xen_irq_disable_direct, 0)
 
 /*
-	(xen_)save_fl is used to get the current interrupt enable status.
-	Callers expect the status to be in X86_EFLAGS_IF, and other bits
-	may be set in the return value.  We take advantage of this by
-	making sure that X86_EFLAGS_IF has the right value (and other bits
-	in that byte are 0), but other bits in the return value are
-	undefined.  We need to toggle the state of the bit, because
-	Xen and x86 use opposite senses (mask vs enable).
+ * (xen_)save_fl is used to get the current interrupt enable status.
+ * Callers expect the status to be in X86_EFLAGS_IF, and other bits
+ * may be set in the return value.  We take advantage of this by
+ * making sure that X86_EFLAGS_IF has the right value (and other bits
+ * in that byte are 0), but other bits in the return value are
+ * undefined.  We need to toggle the state of the bit, because Xen and
+ * x86 use opposite senses (mask vs enable).
  */
 ENTRY(xen_save_fl_direct)
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
-	addb %ah,%ah
+	addb %ah, %ah
 ENDPATCH(xen_save_fl_direct)
 	ret
 	ENDPROC(xen_save_fl_direct)
@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct)
 
 
 /*
-	In principle the caller should be passing us a value return
-	from xen_save_fl_direct, but for robustness sake we test only
-	the X86_EFLAGS_IF flag rather than the whole byte. After
-	setting the interrupt mask state, it checks for unmasked
-	pending events and enters the hypervisor to get them delivered
-	if so.
+ * In principle the caller should be passing us a value return from
+ * xen_save_fl_direct, but for robustness sake we test only the
+ * X86_EFLAGS_IF flag rather than the whole byte. After setting the
+ * interrupt mask state, it checks for unmasked pending events and
+ * enters the hypervisor to get them delivered if so.
  */
 ENTRY(xen_restore_fl_direct)
 #ifdef CONFIG_X86_64
@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct)
 	testb $X86_EFLAGS_IF>>8, %ah
 #endif
 	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
+	/*
+	 * Preempt here doesn't matter because that will deal with any
+	 * pending interrupts.  The pending check may end up being run
+	 * on the wrong CPU, but that doesn't hurt.
+	 */
 
 	/* check for unmasked and pending */
 	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct)
 
 
 /*
-	Force an event check by making a hypercall,
-	but preserve regs before making the call.
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
  */
 check_events:
 #ifdef CONFIG_X86_32
@@ -137,4 +140,3 @@ check_events:
 	pop %rax
 #endif
 	ret
-
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 082d173..88e15de 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,17 +1,16 @@
 /*
-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-	The inline versions are the same as the direct-use versions, with the
-	pre- and post-amble chopped off.
-
-	This code is encoded for size rather than absolute efficiency,
-	with a view to being able to inline as much as possible.
-
-	We only bother with direct forms (ie, vcpu in pda) of the operations
-	here; the indirect forms are better handled in C, since they're
-	generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining.  The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in pda) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
  */
 
-//#include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/processor-flags.h>
 #include <asm/segment.h>
@@ -21,8 +20,8 @@
 #include "xen-asm.h"
 
 /*
-	Force an event check by making a hypercall,
-	but preserve regs before making the call.
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
  */
 check_events:
 	push %eax
@@ -35,10 +34,10 @@ check_events:
 	ret
 
 /*
-	We can't use sysexit directly, because we're not running in ring0.
-	But we can easily fake it up using iret.  Assuming xen_sysexit
-	is jumped to with a standard stack frame, we can just strip it
-	back to a standard iret frame and use iret.
+ * We can't use sysexit directly, because we're not running in ring0.
+ * But we can easily fake it up using iret.  Assuming xen_sysexit is
+ * jumped to with a standard stack frame, we can just strip it back to
+ * a standard iret frame and use iret.
  */
 ENTRY(xen_sysexit)
 	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
@@ -49,33 +48,31 @@ ENTRY(xen_sysexit)
 ENDPROC(xen_sysexit)
 
 /*
-	This is run where a normal iret would be run, with the same stack setup:
-	      8: eflags
-	      4: cs
-	esp-> 0: eip
-
-	This attempts to make sure that any pending events are dealt
-	with on return to usermode, but there is a small window in
-	which an event can happen just before entering usermode.  If
-	the nested interrupt ends up setting one of the TIF_WORK_MASK
-	pending work flags, they will not be tested again before
-	returning to usermode. This means that a process can end up
-	with pending work, which will be unprocessed until the process
-	enters and leaves the kernel again, which could be an
-	unbounded amount of time.  This means that a pending signal or
-	reschedule event could be indefinitely delayed.
-
-	The fix is to notice a nested interrupt in the critical
-	window, and if one occurs, then fold the nested interrupt into
-	the current interrupt stack frame, and re-process it
-	iteratively rather than recursively.  This means that it will
-	exit via the normal path, and all pending work will be dealt
-	with appropriately.
-
-	Because the nested interrupt handler needs to deal with the
-	current stack state in whatever form its in, we keep things
-	simple by only using a single register which is pushed/popped
-	on the stack.
+ * This is run where a normal iret would be run, with the same stack setup:
+ *	8: eflags
+ *	4: cs
+ *	esp-> 0: eip
+ *
+ * This attempts to make sure that any pending events are dealt with
+ * on return to usermode, but there is a small window in which an
+ * event can happen just before entering usermode.  If the nested
+ * interrupt ends up setting one of the TIF_WORK_MASK pending work
+ * flags, they will not be tested again before returning to
+ * usermode. This means that a process can end up with pending work,
+ * which will be unprocessed until the process enters and leaves the
+ * kernel again, which could be an unbounded amount of time.  This
+ * means that a pending signal or reschedule event could be
+ * indefinitely delayed.
+ *
+ * The fix is to notice a nested interrupt in the critical window, and
+ * if one occurs, then fold the nested interrupt into the current
+ * interrupt stack frame, and re-process it iteratively rather than
+ * recursively.  This means that it will exit via the normal path, and
+ * all pending work will be dealt with appropriately.
+ *
+ * Because the nested interrupt handler needs to deal with the current
+ * stack state in whatever form its in, we keep things simple by only
+ * using a single register which is pushed/popped on the stack.
  */
 ENTRY(xen_iret)
 	/* test eflags for special cases */
@@ -85,13 +82,15 @@ ENTRY(xen_iret)
 	push %eax
 	ESP_OFFSET=4	# bytes pushed onto stack
 
-	/* Store vcpu_info pointer for easy access.  Do it this
-	   way to avoid having to reload %fs */
+	/*
+	 * Store vcpu_info pointer for easy access.  Do it this way to
+	 * avoid having to reload %fs
+	 */
 #ifdef CONFIG_SMP
 	GET_THREAD_INFO(%eax)
-	movl TI_cpu(%eax),%eax
-	movl __per_cpu_offset(,%eax,4),%eax
-	mov per_cpu__xen_vcpu(%eax),%eax
+	movl TI_cpu(%eax), %eax
+	movl __per_cpu_offset(,%eax,4), %eax
+	mov per_cpu__xen_vcpu(%eax), %eax
 #else
 	movl per_cpu__xen_vcpu, %eax
 #endif
@@ -99,37 +98,46 @@ ENTRY(xen_iret)
 	/* check IF state we're restoring */
 	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
 
-	/* Maybe enable events.  Once this happens we could get a
-	   recursive event, so the critical region starts immediately
-	   afterwards.  However, if that happens we don't end up
-	   resuming the code, so we don't have to be worried about
-	   being preempted to another CPU. */
+	/*
+	 * Maybe enable events.  Once this happens we could get a
+	 * recursive event, so the critical region starts immediately
+	 * afterwards.  However, if that happens we don't end up
+	 * resuming the code, so we don't have to be worried about
+	 * being preempted to another CPU.
+	 */
 	setz XEN_vcpu_info_mask(%eax)
 xen_iret_start_crit:
 
 	/* check for unmasked and pending */
 	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
 
-	/* If there's something pending, mask events again so we
-	   can jump back into xen_hypervisor_callback */
+	/*
+	 * If there's something pending, mask events again so we can
+	 * jump back into xen_hypervisor_callback
+	 */
 	sete XEN_vcpu_info_mask(%eax)
 
 	popl %eax
 
-	/* From this point on the registers are restored and the stack
-	   updated, so we don't need to worry about it if we're preempted */
+	/*
+	 * From this point on the registers are restored and the stack
+	 * updated, so we don't need to worry about it if we're
+	 * preempted
+	 */
 iret_restore_end:
 
-	/* Jump to hypervisor_callback after fixing up the stack.
-	   Events are masked, so jumping out of the critical
-	   region is OK. */
+	/*
+	 * Jump to hypervisor_callback after fixing up the stack.
+	 * Events are masked, so jumping out of the critical region is
+	 * OK.
+	 */
 	je xen_hypervisor_callback
 
 1:	iret
 xen_iret_end_crit:
-.section __ex_table,"a"
+.section __ex_table, "a"
 	.align 4
-	.long 1b,iret_exc
+	.long 1b, iret_exc
 .previous
 
 hyper_iret:
@@ -139,55 +147,55 @@ hyper_iret:
 	.globl xen_iret_start_crit, xen_iret_end_crit
 
 /*
-   This is called by xen_hypervisor_callback in entry.S when it sees
-   that the EIP at the time of interrupt was between xen_iret_start_crit
-   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
-   a more refined determination of what to do.
-
-   The stack format at this point is:
-	----------------
-	 ss		: (ss/esp may be present if we came from usermode)
-	 esp		:
-	 eflags		}  outer exception info
-	 cs		}
-	 eip		}
-	---------------- <- edi (copy dest)
-	 eax		:  outer eax if it hasn't been restored
-	----------------
-	 eflags		}  nested exception info
-	 cs		}   (no ss/esp because we're nested
-	 eip		}    from the same ring)
-	 orig_eax	}<- esi (copy src)
-	 - - - - - - - -
-	 fs		}
-	 es		}
-	 ds		}  SAVE_ALL state
-	 eax		}
-	  :		:
-	 ebx		}<- esp
-	----------------
-
-   In order to deliver the nested exception properly, we need to shift
-   everything from the return addr up to the error code so it
-   sits just under the outer exception info.  This means that when we
-   handle the exception, we do it in the context of the outer exception
-   rather than starting a new one.
-
-   The only caveat is that if the outer eax hasn't been
-   restored yet (ie, it's still on stack), we need to insert
-   its value into the SAVE_ALL state before going on, since
-   it's usermode state which we eventually need to restore.
+ * This is called by xen_hypervisor_callback in entry.S when it sees
+ * that the EIP at the time of interrupt was between
+ * xen_iret_start_crit and xen_iret_end_crit.  We're passed the EIP in
+ * %eax so we can do a more refined determination of what to do.
+ *
+ * The stack format at this point is:
+ *	----------------
+ *	 ss		: (ss/esp may be present if we came from usermode)
+ *	 esp		:
+ *	 eflags		}  outer exception info
+ *	 cs		}
+ *	 eip		}
+ *	---------------- <- edi (copy dest)
+ *	 eax		:  outer eax if it hasn't been restored
+ *	----------------
+ *	 eflags		}  nested exception info
+ *	 cs		}   (no ss/esp because we're nested
+ *	 eip		}    from the same ring)
+ *	 orig_eax	}<- esi (copy src)
+ *	 - - - - - - - -
+ *	 fs		}
+ *	 es		}
+ *	 ds		}  SAVE_ALL state
+ *	 eax		}
+ *	  :		:
+ *	 ebx		}<- esp
+ *	----------------
+ *
+ * In order to deliver the nested exception properly, we need to shift
+ * everything from the return addr up to the error code so it sits
+ * just under the outer exception info.  This means that when we
+ * handle the exception, we do it in the context of the outer
+ * exception rather than starting a new one.
+ *
+ * The only caveat is that if the outer eax hasn't been restored yet
+ * (ie, it's still on stack), we need to insert its value into the
+ * SAVE_ALL state before going on, since it's usermode state which we
+ * eventually need to restore.
  */
 ENTRY(xen_iret_crit_fixup)
 	/*
-	   Paranoia: Make sure we're really coming from kernel space.
-	   One could imagine a case where userspace jumps into the
-	   critical range address, but just before the CPU delivers a GP,
-	   it decides to deliver an interrupt instead.  Unlikely?
-	   Definitely.  Easy to avoid?  Yes.  The Intel documents
-	   explicitly say that the reported EIP for a bad jump is the
-	   jump instruction itself, not the destination, but some virtual
-	   environments get this wrong.
+	 * Paranoia: Make sure we're really coming from kernel space.
+	 * One could imagine a case where userspace jumps into the
+	 * critical range address, but just before the CPU delivers a
+	 * GP, it decides to deliver an interrupt instead.  Unlikely?
+	 * Definitely.  Easy to avoid?  Yes.  The Intel documents
+	 * explicitly say that the reported EIP for a bad jump is the
+	 * jump instruction itself, not the destination, but some
+	 * virtual environments get this wrong.
 	 */
 	movl PT_CS(%esp), %ecx
 	andl $SEGMENT_RPL_MASK, %ecx
@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
 	lea PT_ORIG_EAX(%esp), %esi
 	lea PT_EFLAGS(%esp), %edi
 
-	/* If eip is before iret_restore_end then stack
-	   hasn't been restored yet. */
+	/*
+	 * If eip is before iret_restore_end then stack
+	 * hasn't been restored yet.
+	 */
 	cmp $iret_restore_end, %eax
 	jae 1f
 
-	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
+	movl 0+4(%edi), %eax		/* copy EAX (just above top of frame) */
 	movl %eax, PT_EAX(%esp)
 
-	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
+	lea ESP_OFFSET(%edi), %edi	/* move dest up over saved regs */
 
 	/* set up the copy */
 1:	std
@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup)
 	rep movsl
 	cld
 
-	lea 4(%edi),%esp		/* point esp to new frame */
+	lea 4(%edi), %esp		/* point esp to new frame */
 2:	jmp xen_do_upcall
 
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index d205a28..02f496a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -1,14 +1,14 @@
 /*
-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-	The inline versions are the same as the direct-use versions, with the
-	pre- and post-amble chopped off.
-
-	This code is encoded for size rather than absolute efficiency,
-	with a view to being able to inline as much as possible.
-
-	We only bother with direct forms (ie, vcpu in pda) of the operations
-	here; the indirect forms are better handled in C, since they're
-	generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining.  The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in pda) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
  */
 
 #include <asm/errno.h>
@@ -21,25 +21,25 @@
 #include "xen-asm.h"
 
 ENTRY(xen_adjust_exception_frame)
-	mov 8+0(%rsp),%rcx
-	mov 8+8(%rsp),%r11
+	mov 8+0(%rsp), %rcx
+	mov 8+8(%rsp), %r11
 	ret $16
 
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
-	Xen64 iret frame:
-
-	ss
-	rsp
-	rflags
-	cs
-	rip		<-- standard iret frame
-
-	flags
-
-	rcx		}
-	r11		}<-- pushed by hypercall page
-rsp ->	rax		}
+ * Xen64 iret frame:
+ *
+ *	ss
+ *	rsp
+ *	rflags
+ *	cs
+ *	rip		<-- standard iret frame
+ *
+ *	flags
+ *
+ *	rcx		}
+ *	r11		}<-- pushed by hypercall page
+ * rsp->rax		}
  */
 ENTRY(xen_iret)
 	pushq $0
@@ -48,8 +48,8 @@ ENDPATCH(xen_iret)
 RELOC(xen_iret, 1b+1)
 
 /*
-	sysexit is not used for 64-bit processes, so it's
-	only ever used to return to 32-bit compat userspace.
+ * sysexit is not used for 64-bit processes, so it's only ever used to
+ * return to 32-bit compat userspace.
  */
 ENTRY(xen_sysexit)
 	pushq $__USER32_DS
@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit)
 RELOC(xen_sysexit, 1b+1)
 
 ENTRY(xen_sysret64)
-	/* We're already on the usermode stack at this point, but still
-	   with the kernel gs, so we can easily switch back */
+	/*
+	 * We're already on the usermode stack at this point, but
+	 * still with the kernel gs, so we can easily switch back
+	 */
 	movq %rsp, PER_CPU_VAR(old_rsp)
-	movq PER_CPU_VAR(kernel_stack),%rsp
+	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER_DS
 	pushq PER_CPU_VAR(old_rsp)
@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64)
 RELOC(xen_sysret64, 1b+1)
 
 ENTRY(xen_sysret32)
-	/* We're already on the usermode stack at this point, but still
-	   with the kernel gs, so we can easily switch back */
+	/*
+	 * We're already on the usermode stack at this point, but
+	 * still with the kernel gs, so we can easily switch back
+	 */
 	movq %rsp, PER_CPU_VAR(old_rsp)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32)
 RELOC(xen_sysret32, 1b+1)
 
 /*
-	Xen handles syscall callbacks much like ordinary exceptions,
-	which means we have:
-	 - kernel gs
-	 - kernel rsp
-	 - an iret-like stack frame on the stack (including rcx and r11):
-		ss
-		rsp
-		rflags
-		cs
-		rip
-		r11
-	rsp->	rcx
-
-	In all the entrypoints, we undo all that to make it look
-	like a CPU-generated syscall/sysenter and jump to the normal
-	entrypoint.
+ * Xen handles syscall callbacks much like ordinary exceptions, which
+ * means we have:
+ * - kernel gs
+ * - kernel rsp
+ * - an iret-like stack frame on the stack (including rcx and r11):
+ *	ss
+ *	rsp
+ *	rflags
+ *	cs
+ *	rip
+ *	r11
+ * rsp->rcx
+ *
+ * In all the entrypoints, we undo all that to make it look like a
+ * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
 .macro undo_xen_syscall
-	mov 0*8(%rsp),%rcx
-	mov 1*8(%rsp),%r11
-	mov 5*8(%rsp),%rsp
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 .endm
 
 /* Normal 64-bit system call target */
@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target)
 
 ENTRY(xen_syscall32_target)
 ENTRY(xen_sysenter_target)
-	lea 16(%rsp), %rsp	/* strip %rcx,%r11 */
+	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
 	mov $-ENOSYS, %rax
 	pushq $VGCF_in_syscall
 	jmp hypercall_iret
-- 
cgit v0.10.2


From 56fc82c5360cdf0b250b5eb74f38657b0402faa5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Feb 2009 00:48:02 +0900
Subject: modpost: NOBITS sections may point beyond the end of the file

Impact: fix link failure on certain toolchains with specific configs

Recent percpu change made x86_64 split .data.init section into three
separate segments - data.init, percpu and data.init2.  data.init2 gets
.data.nosave and .bss.* and is followed by .notes segment.  Depending
on configuration both segments might contain no data, in which case
the tool chain makes the section header to contain offset beyond the
end of the file.

modpost isn't too happy about it and fails build - as reported by
Pawel Dziekonski:

    Building modules, stage 2.
    MODPOST 416 modules
    FATAL: vmlinux is truncated. sechdrs[i].sh_offset=10354688 >
    sizeof(*hrd)=64
    make[1]: *** [__modpost] Error 1

Teach modpost that NOBITS section may point beyond the end of the file
and that .modinfo can't be NOBITS.

Reported-by: Pawel Dziekonski <dzieko@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 8892161..7e62303 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		const char *secstrings
 			= (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 		const char *secname;
+		int nobits = sechdrs[i].sh_type == SHT_NOBITS;
 
-		if (sechdrs[i].sh_offset > info->size) {
+		if (!nobits && sechdrs[i].sh_offset > info->size) {
 			fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
 			      "sizeof(*hrd)=%zu\n", filename,
 			      (unsigned long)sechdrs[i].sh_offset,
@@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		}
 		secname = secstrings + sechdrs[i].sh_name;
 		if (strcmp(secname, ".modinfo") == 0) {
+			if (nobits)
+				fatal("%s has NOBITS .modinfo\n", filename);
 			info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
 			info->modinfo_len = sechdrs[i].sh_size;
 		} else if (strcmp(secname, "__ksymtab") == 0)
-- 
cgit v0.10.2


From d3770449d3cb058b94ca1d050d5ced4a66c75ce4 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 8 Feb 2009 09:58:38 -0500
Subject: percpu: make PER_CPU_BASE_SECTION overridable by arches

Impact: bug fix

IA-64 needs to put percpu data in the seperate section even on UP.
Fixes regression caused by "percpu: refactor percpu.h"

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 77f30b6..30cf465 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -27,12 +27,12 @@ extern void *per_cpu_init(void);
 
 #else /* ! SMP */
 
-#define PER_CPU_ATTRIBUTES	__attribute__((__section__(".data.percpu")))
-
 #define per_cpu_init()				(__phys_per_cpu_start)
 
 #endif	/* SMP */
 
+#define PER_CPU_BASE_SECTION ".data.percpu"
+
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
  * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 0e24202..3577ffd 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -8,8 +8,15 @@
 
 #include <asm/percpu.h>
 
+#ifndef PER_CPU_BASE_SECTION
 #ifdef CONFIG_SMP
 #define PER_CPU_BASE_SECTION ".data.percpu"
+#else
+#define PER_CPU_BASE_SECTION ".data"
+#endif
+#endif
+
+#ifdef CONFIG_SMP
 
 #ifdef MODULE
 #define PER_CPU_SHARED_ALIGNED_SECTION ""
@@ -20,7 +27,6 @@
 
 #else
 
-#define PER_CPU_BASE_SECTION ".data"
 #define PER_CPU_SHARED_ALIGNED_SECTION ""
 #define PER_CPU_FIRST_SECTION ""
 
-- 
cgit v0.10.2


From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 8 Feb 2009 09:58:39 -0500
Subject: x86: use linker to offset symbols by __per_cpu_load

Impact: cleanup and bug fix

Use the linker to create symbols for certain per-cpu variables
that are offset by __per_cpu_load.  This allows the removal of
the runtime fixup of the GDT pointer, which fixes a bug with
resume reported by Jiri Slaby.

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0b64af4..aee103b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -34,6 +34,12 @@
 #define PER_CPU_VAR(var)	per_cpu__##var
 #endif	/* SMP */
 
+#ifdef CONFIG_X86_64_SMP
+#define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
+#else
+#define INIT_PER_CPU_VAR(var)  per_cpu__##var
+#endif
+
 #else /* ...!ASSEMBLY */
 
 #include <linux/stringify.h>
@@ -45,6 +51,22 @@
 #define __percpu_arg(x)		"%" #x
 #endif
 
+/*
+ * Initialized pointers to per-cpu variables needed for the boot
+ * processor need to use these macros to get the proper address
+ * offset from __per_cpu_load on SMP.
+ *
+ * There also must be an entry in vmlinux_64.lds.S
+ */
+#define DECLARE_INIT_PER_CPU(var) \
+       extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
+
+#ifdef CONFIG_X86_64_SMP
+#define init_per_cpu_var(var)  init_per_cpu__##var
+#else
+#define init_per_cpu_var(var)  per_cpu_var(var)
+#endif
+
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 656d02e..373d3f6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -393,6 +393,8 @@ union irq_stack_union {
 };
 
 DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+DECLARE_INIT_PER_CPU(irq_stack_union);
+
 DECLARE_PER_CPU(char *, irq_stack_ptr);
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0f73ea4..41b0de6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -902,12 +902,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE);
-#ifdef CONFIG_SMP
-DEFINE_PER_CPU(char *, irq_stack_ptr);	/* will be set during per cpu init */
-#else
 DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-#endif
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
 	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a0a2b5c..2e648e3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64)
 	pushq $0
 	popfq
 
-#ifdef CONFIG_SMP
-	/*
-	 * Fix up static pointers that need __per_cpu_load added.  The assembler
-	 * is unable to do this directly.  This is only needed for the boot cpu.
-	 * These values are set up with the correct base addresses by C code for
-	 * secondary cpus.
-	 */
-	movq	initial_gs(%rip), %rax
-	cmpl	$0, per_cpu__cpu_number(%rax)
-	jne	1f
-	addq	%rax, early_gdt_descr_base(%rip)
-1:
-#endif
 	/*
 	 * We must switch to a new descriptor in kernel space for the GDT
 	 * because soon the kernel won't have access anymore to the userspace
@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64)
 	ENTRY(initial_code)
 	.quad	x86_64_start_kernel
 	ENTRY(initial_gs)
-#ifdef CONFIG_SMP
-	.quad	__per_cpu_load
-#else
-	.quad	PER_CPU_VAR(irq_stack_union)
-#endif
+	.quad	INIT_PER_CPU_VAR(irq_stack_union)
 	__FINITDATA
 
 	ENTRY(stack_start)
@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt)
 early_gdt_descr:
 	.word	GDT_ENTRIES*8-1
 early_gdt_descr_base:
-	.quad	per_cpu__gdt_page
+	.quad	INIT_PER_CPU_VAR(gdt_page)
 
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 07f62d2..087a7f2 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -257,6 +257,14 @@ SECTIONS
   DWARF_DEBUG
 }
 
+ /*
+  * Per-cpu symbols which need to be offset from __per_cpu_load
+  * for the boot processor.
+  */
+#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+INIT_PER_CPU(gdt_page);
+INIT_PER_CPU(irq_stack_union);
+
 /*
  * Build-time check on the image size:
  */
-- 
cgit v0.10.2


From 44581a28e805a31661469c4b466b9cd14b36e7b6 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 8 Feb 2009 09:58:40 -0500
Subject: x86: fix abuse of per_cpu_offset

Impact: bug fix

Don't use per_cpu_offset() to determine if it valid to access a
per-cpu variable for a given cpu number.  It is not a valid assumption
on x86-64 anymore. Use cpu_possible() instead.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 08d140f..deb1c1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node)
 	}
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
+	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
 		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
 		dump_stack();
 		return;
@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu)
 	if (early_per_cpu_ptr(x86_cpu_to_node_map))
 		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
 
-	if (!per_cpu_offset(cpu)) {
+	if (!cpu_possible(cpu)) {
 		printk(KERN_WARNING
 			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
 		dump_stack();
-- 
cgit v0.10.2


From 6cd61c0baa8bce32271226198b46c67a7a05d108 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:39 +0900
Subject: elf: add ELF_CORE_COPY_KERNEL_REGS()

ELF core dump is used for both user land core dump and kernel crash
dump.  Depending on architecture, register might need to be accessed
differently for userland and kernel.  Allow architectures to define
ELF_CORE_COPY_KERNEL_REGS() and use different operation for kernel
register dump.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 5ca54d7..7605c5e 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re
 #endif
 }
 
+static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+{
+#ifdef ELF_CORE_COPY_KERNEL_REGS
+	ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
+#else
+	elf_core_copy_regs(elfregs, regs);
+#endif
+}
+
 static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
 {
 #ifdef ELF_CORE_COPY_TASK_REGS
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 8a6d7b0..795e7b6 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
 		return;
 	memset(&prstatus, 0, sizeof(prstatus));
 	prstatus.pr_pid = current->pid;
-	elf_core_copy_regs(&prstatus.pr_reg, regs);
+	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
 		      	      &prstatus, sizeof(prstatus));
 	final_note(buf);
-- 
cgit v0.10.2


From 76397f72fb9f4c9a96dfe05462887811c81b0e17 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:39 +0900
Subject: x86: stackprotector.h misc update

Impact: misc udpate

* wrap content with CONFIG_CC_STACK_PROTECTOR so that other arch files
  can include it directly

* add missing includes

This will help future changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 36a700a..ee275e9 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -1,8 +1,12 @@
 #ifndef _ASM_STACKPROTECTOR_H
 #define _ASM_STACKPROTECTOR_H 1
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+
 #include <asm/tsc.h>
 #include <asm/processor.h>
+#include <asm/percpu.h>
+#include <linux/random.h>
 
 /*
  * Initialize the stackprotector canary value.
@@ -35,4 +39,5 @@ static __always_inline void boot_init_stack_canary(void)
 	percpu_write(irq_stack_union.stack_canary, canary);
 }
 
-#endif
+#endif	/* CC_STACKPROTECTOR */
+#endif	/* _ASM_STACKPROTECTOR_H */
-- 
cgit v0.10.2


From 5d707e9c8ef2a3596ed5c975c6ff05cec890c2b4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:39 +0900
Subject: stackprotector: update make rules

Impact: no default -fno-stack-protector if stackp is enabled, cleanup

Stackprotector make rules had the following problems.

* cc support test and warning are scattered across makefile and
  kernel/panic.c.

* -fno-stack-protector was always added regardless of configuration.

Update such that cc support test and warning are contained in makefile
and -fno-stack-protector is added iff stackp is turned off.  While at
it, prepare for 32bit support.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Makefile b/Makefile
index 681c1d2..77a006d 100644
--- a/Makefile
+++ b/Makefile
@@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
 # Force gcc to behave correct even for buggy distributions
-# Arch Makefiles may override this setting
+ifndef CONFIG_CC_STACKPROTECTOR
 KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
+endif
 
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index cacee98..ab48ab4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -70,14 +70,17 @@ else
         # this works around some issues with generating unwind tables in older gccs
         # newer gccs do it by default
         KBUILD_CFLAGS += -maccumulate-outgoing-args
+endif
 
-        stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
-        stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
-                "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
-        stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
-                "$(CC)" -fstack-protector-all )
-
-        KBUILD_CFLAGS += $(stackp-y)
+ifdef CONFIG_CC_STACKPROTECTOR
+	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
+        ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
+                stackp-y := -fstack-protector
+                stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
+                KBUILD_CFLAGS += $(stackp-y)
+        else
+                $(warning stack protector enabled but no compiler support)
+        endif
 endif
 
 # Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/kernel/panic.c b/kernel/panic.c
index 33cab3d..32fe4ef 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 
-#ifndef GCC_HAS_SP
-#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
-#endif
-
 /*
  * Called when gcc's -fstack-protector feature is used, and
  * gcc detects corruption of the on-stack canary value
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
index 325c0a1..2d69fcd 100644
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -2,5 +2,7 @@
 
 echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
-	echo $2
+	echo y
+else
+	echo n
 fi
-- 
cgit v0.10.2


From d627ded5ab2f7818154d57369e3a8cdb40db2569 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:40 +0900
Subject: x86: no stack protector for vdso

Impact: avoid crash on vsyscall

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4d6ef0a..16a9020 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-       $(filter -g%,$(KBUILD_CFLAGS))
+       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
 
 $(vobjs): KBUILD_CFLAGS += $(CFL)
 
-- 
cgit v0.10.2


From f0d96110f9fd98a1a22e03b8adba69508843d910 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:40 +0900
Subject: x86: use asm .macro instead of cpp #define in entry_32.S

Impact: cleanup

Use .macro instead of cpp #define where approriate.  This cleans up
code and will ease future changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a0b91aa..c461925 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -101,121 +101,127 @@
 #define resume_userspace_sig	resume_userspace
 #endif
 
-#define SAVE_ALL \
-	cld; \
-	pushl %fs; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	/*CFI_REL_OFFSET fs, 0;*/\
-	pushl %es; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	/*CFI_REL_OFFSET es, 0;*/\
-	pushl %ds; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	/*CFI_REL_OFFSET ds, 0;*/\
-	pushl %eax; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET eax, 0;\
-	pushl %ebp; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET ebp, 0;\
-	pushl %edi; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET edi, 0;\
-	pushl %esi; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET esi, 0;\
-	pushl %edx; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET edx, 0;\
-	pushl %ecx; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET ecx, 0;\
-	pushl %ebx; \
-	CFI_ADJUST_CFA_OFFSET 4;\
-	CFI_REL_OFFSET ebx, 0;\
-	movl $(__USER_DS), %edx; \
-	movl %edx, %ds; \
-	movl %edx, %es; \
-	movl $(__KERNEL_PERCPU), %edx; \
+.macro SAVE_ALL
+	cld
+	pushl %fs
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET fs, 0;*/
+	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0;*/
+	pushl %ds
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET ds, 0;*/
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET eax, 0
+	pushl %ebp
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebp, 0
+	pushl %edi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edi, 0
+	pushl %esi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET esi, 0
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx, 0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx, 0
+	pushl %ebx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebx, 0
+	movl $(__USER_DS), %edx
+	movl %edx, %ds
+	movl %edx, %es
+	movl $(__KERNEL_PERCPU), %edx
 	movl %edx, %fs
+.endm
 
-#define RESTORE_INT_REGS \
-	popl %ebx;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE ebx;\
-	popl %ecx;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE ecx;\
-	popl %edx;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE edx;\
-	popl %esi;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE esi;\
-	popl %edi;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE edi;\
-	popl %ebp;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	CFI_RESTORE ebp;\
-	popl %eax;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
+.macro RESTORE_INT_REGS
+	popl %ebx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ebx
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ecx
+	popl %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edx
+	popl %esi
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE esi
+	popl %edi
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edi
+	popl %ebp
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ebp
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
 	CFI_RESTORE eax
+.endm
 
-#define RESTORE_REGS	\
-	RESTORE_INT_REGS; \
-1:	popl %ds;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	/*CFI_RESTORE ds;*/\
-2:	popl %es;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	/*CFI_RESTORE es;*/\
-3:	popl %fs;	\
-	CFI_ADJUST_CFA_OFFSET -4;\
-	/*CFI_RESTORE fs;*/\
-.pushsection .fixup,"ax";	\
-4:	movl $0,(%esp);	\
-	jmp 1b;		\
-5:	movl $0,(%esp);	\
-	jmp 2b;		\
-6:	movl $0,(%esp);	\
-	jmp 3b;		\
-.section __ex_table,"a";\
-	.align 4;	\
-	.long 1b,4b;	\
-	.long 2b,5b;	\
-	.long 3b,6b;	\
+.macro RESTORE_REGS
+	RESTORE_INT_REGS
+1:	popl %ds
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_RESTORE ds;*/
+2:	popl %es
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_RESTORE es;*/
+3:	popl %fs
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_RESTORE fs;*/
+.pushsection .fixup, "ax"
+4:	movl $0, (%esp)
+	jmp 1b
+5:	movl $0, (%esp)
+	jmp 2b
+6:	movl $0, (%esp)
+	jmp 3b
+.section __ex_table, "a"
+	.align 4
+	.long 1b, 4b
+	.long 2b, 5b
+	.long 3b, 6b
 .popsection
+.endm
 
-#define RING0_INT_FRAME \
-	CFI_STARTPROC simple;\
-	CFI_SIGNAL_FRAME;\
-	CFI_DEF_CFA esp, 3*4;\
-	/*CFI_OFFSET cs, -2*4;*/\
+.macro RING0_INT_FRAME
+	CFI_STARTPROC simple
+	CFI_SIGNAL_FRAME
+	CFI_DEF_CFA esp, 3*4
+	/*CFI_OFFSET cs, -2*4;*/
 	CFI_OFFSET eip, -3*4
+.endm
 
-#define RING0_EC_FRAME \
-	CFI_STARTPROC simple;\
-	CFI_SIGNAL_FRAME;\
-	CFI_DEF_CFA esp, 4*4;\
-	/*CFI_OFFSET cs, -2*4;*/\
+.macro RING0_EC_FRAME
+	CFI_STARTPROC simple
+	CFI_SIGNAL_FRAME
+	CFI_DEF_CFA esp, 4*4
+	/*CFI_OFFSET cs, -2*4;*/
 	CFI_OFFSET eip, -3*4
+.endm
 
-#define RING0_PTREGS_FRAME \
-	CFI_STARTPROC simple;\
-	CFI_SIGNAL_FRAME;\
-	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
-	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
-	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
-	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
-	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
-	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
-	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
-	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
-	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
-	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
-	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
+.macro RING0_PTREGS_FRAME
+	CFI_STARTPROC simple
+	CFI_SIGNAL_FRAME
+	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
+	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
+	CFI_OFFSET eip, PT_EIP-PT_OLDESP
+	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
+	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
+	CFI_OFFSET eax, PT_EAX-PT_OLDESP
+	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
+	CFI_OFFSET edi, PT_EDI-PT_OLDESP
+	CFI_OFFSET esi, PT_ESI-PT_OLDESP
+	CFI_OFFSET edx, PT_EDX-PT_OLDESP
+	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
 	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
+.endm
 
 ENTRY(ret_from_fork)
 	CFI_STARTPROC
@@ -595,28 +601,30 @@ syscall_badsys:
 END(syscall_badsys)
 	CFI_ENDPROC
 
-#define FIXUP_ESPFIX_STACK \
-	/* since we are on a wrong stack, we cant make it a C code :( */ \
-	PER_CPU(gdt_page, %ebx); \
-	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
-	addl %esp, %eax; \
-	pushl $__KERNEL_DS; \
-	CFI_ADJUST_CFA_OFFSET 4; \
-	pushl %eax; \
-	CFI_ADJUST_CFA_OFFSET 4; \
-	lss (%esp), %esp; \
-	CFI_ADJUST_CFA_OFFSET -8;
-#define UNWIND_ESPFIX_STACK \
-	movl %ss, %eax; \
-	/* see if on espfix stack */ \
-	cmpw $__ESPFIX_SS, %ax; \
-	jne 27f; \
-	movl $__KERNEL_DS, %eax; \
-	movl %eax, %ds; \
-	movl %eax, %es; \
-	/* switch to normal stack */ \
-	FIXUP_ESPFIX_STACK; \
-27:;
+.macro FIXUP_ESPFIX_STACK
+	/* since we are on a wrong stack, we cant make it a C code :( */
+	PER_CPU(gdt_page, %ebx)
+	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+	addl %esp, %eax
+	pushl $__KERNEL_DS
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	lss (%esp), %esp
+	CFI_ADJUST_CFA_OFFSET -8
+.endm
+.macro UNWIND_ESPFIX_STACK
+	movl %ss, %eax
+	/* see if on espfix stack */
+	cmpw $__ESPFIX_SS, %ax
+	jne 27f
+	movl $__KERNEL_DS, %eax
+	movl %eax, %ds
+	movl %eax, %es
+	/* switch to normal stack */
+	FIXUP_ESPFIX_STACK
+27:
+.endm
 
 /*
  * Build the entry stubs and pointer table with some assembler magic.
@@ -1136,26 +1144,27 @@ END(page_fault)
  * by hand onto the new stack - while updating the return eip past
  * the instruction that would have done it for sysenter.
  */
-#define FIX_STACK(offset, ok, label)		\
-	cmpw $__KERNEL_CS,4(%esp);		\
-	jne ok;					\
-label:						\
-	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
-	CFI_DEF_CFA esp, 0;			\
-	CFI_UNDEFINED eip;			\
-	pushfl;					\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $__KERNEL_CS;			\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $sysenter_past_esp;		\
-	CFI_ADJUST_CFA_OFFSET 4;		\
+.macro FIX_STACK offset ok label
+	cmpw $__KERNEL_CS, 4(%esp)
+	jne \ok
+\label:
+	movl TSS_sysenter_sp0 + \offset(%esp), %esp
+	CFI_DEF_CFA esp, 0
+	CFI_UNDEFINED eip
+	pushfl
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl $__KERNEL_CS
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl $sysenter_past_esp
+	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
+.endm
 
 ENTRY(debug)
 	RING0_INT_FRAME
 	cmpl $ia32_sysenter_target,(%esp)
 	jne debug_stack_correct
-	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
 debug_stack_correct:
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
@@ -1213,7 +1222,7 @@ nmi_stack_correct:
 
 nmi_stack_fixup:
 	RING0_INT_FRAME
-	FIX_STACK(12,nmi_stack_correct, 1)
+	FIX_STACK 12, nmi_stack_correct, 1
 	jmp nmi_stack_correct
 
 nmi_debug_stack_check:
@@ -1224,7 +1233,7 @@ nmi_debug_stack_check:
 	jb nmi_stack_correct
 	cmpl $debug_esp_fix_insn,(%esp)
 	ja nmi_stack_correct
-	FIX_STACK(24,nmi_stack_correct, 1)
+	FIX_STACK 24, nmi_stack_correct, 1
 	jmp nmi_stack_correct
 
 nmi_espfix_stack:
-- 
cgit v0.10.2


From d9a89a26e02ef9ed03f74a755a8b4d8f3a066622 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:40 +0900
Subject: x86: add %gs accessors for x86_32

Impact: cleanup

On x86_32, %gs is handled lazily.  It's not saved and restored on
kernel entry/exit but only when necessary which usually is during task
switch but there are few other places.  Currently, it's done by
calling savesegment() and loadsegment() explicitly.  Define
get_user_gs(), set_user_gs() and task_user_gs() and use them instead.

While at it, clean up register access macros in signal.c.

This cleans up code a bit and will help future changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 3c601f8..bb70e39 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
 	dump->regs.ds = (u16)regs->ds;
 	dump->regs.es = (u16)regs->es;
 	dump->regs.fs = (u16)regs->fs;
-	savesegment(gs, dump->regs.gs);
+	dump->regs.gs = get_user_gs(regs);
 	dump->regs.orig_ax = regs->orig_ax;
 	dump->regs.ip = regs->ip;
 	dump->regs.cs = (u16)regs->cs;
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f51a3dd..39b0aac 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -124,7 +124,7 @@ do {						\
 	pr_reg[7] = regs->ds & 0xffff;		\
 	pr_reg[8] = regs->es & 0xffff;		\
 	pr_reg[9] = regs->fs & 0xffff;		\
-	savesegment(gs, pr_reg[10]);		\
+	pr_reg[10] = get_user_gs(regs);		\
 	pr_reg[11] = regs->orig_ax;		\
 	pr_reg[12] = regs->ip;			\
 	pr_reg[13] = regs->cs & 0xffff;		\
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 52948df..4955165 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -79,7 +79,7 @@ do {						\
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)			\
 do {						\
-	loadsegment(gs, 0);			\
+	set_user_gs(task_pt_regs(tsk), 0);	\
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)			\
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2fcc70b..70c74b8 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -182,6 +182,15 @@ extern void native_load_gs_index(unsigned);
 #define savesegment(seg, value)				\
 	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
+/*
+ * x86_32 user gs accessors.
+ */
+#ifdef CONFIG_X86_32
+#define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
+#define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
+#define task_user_gs(tsk)	((tsk)->thread.gs)
+#endif
+
 static inline unsigned long get_limit(unsigned long segment)
 {
 	unsigned long __limit;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1a1ae8e..d58a340 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -131,7 +131,7 @@ void __show_regs(struct pt_regs *regs, int all)
 	if (user_mode_vm(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
-		savesegment(gs, gs);
+		gs = get_user_gs(regs);
 	} else {
 		sp = (unsigned long) (&regs->sp);
 		savesegment(ss, ss);
@@ -304,7 +304,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 
 	p->thread.ip = (unsigned long) ret_from_fork;
 
-	savesegment(gs, p->thread.gs);
+	task_user_gs(p) = get_user_gs(regs);
 
 	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -342,7 +342,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
-	__asm__("movl %0, %%gs" : : "r"(0));
+	set_user_gs(regs, 0);
 	regs->fs		= 0;
 	set_fs(USER_DS);
 	regs->ds		= __USER_DS;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f..508b6b5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -90,9 +90,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
 	if (offset != offsetof(struct user_regs_struct, gs))
 		retval = *pt_regs_access(task_pt_regs(task), offset);
 	else {
-		retval = task->thread.gs;
 		if (task == current)
-			savesegment(gs, retval);
+			retval = get_user_gs(task_pt_regs(task));
+		else
+			retval = task_user_gs(task);
 	}
 	return retval;
 }
@@ -126,13 +127,10 @@ static int set_segment_reg(struct task_struct *task,
 		break;
 
 	case offsetof(struct user_regs_struct, gs):
-		task->thread.gs = value;
 		if (task == current)
-			/*
-			 * The user-mode %gs is not affected by
-			 * kernel entry, so we must update the CPU.
-			 */
-			loadsegment(gs, value);
+			set_user_gs(task_pt_regs(task), value);
+		else
+			task_user_gs(task) = value;
 	}
 
 	return 0;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 7fc78b0..8562387 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -50,27 +50,23 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
-#define COPY(x)			{		\
-	get_user_ex(regs->x, &sc->x);		\
-}
+#define COPY(x)			do {			\
+	get_user_ex(regs->x, &sc->x);			\
+} while (0)
 
-#define COPY_SEG(seg)		{			\
-		unsigned short tmp;			\
-		get_user_ex(tmp, &sc->seg);		\
-		regs->seg = tmp;			\
-}
+#define GET_SEG(seg)		({			\
+	unsigned short tmp;				\
+	get_user_ex(tmp, &sc->seg);			\
+	tmp;						\
+})
 
-#define COPY_SEG_CPL3(seg)	{			\
-		unsigned short tmp;			\
-		get_user_ex(tmp, &sc->seg);		\
-		regs->seg = tmp | 3;			\
-}
+#define COPY_SEG(seg)		do {			\
+	regs->seg = GET_SEG(seg);			\
+} while (0)
 
-#define GET_SEG(seg)		{			\
-		unsigned short tmp;			\
-		get_user_ex(tmp, &sc->seg);		\
-		loadsegment(seg, tmp);			\
-}
+#define COPY_SEG_CPL3(seg)	do {			\
+	regs->seg = GET_SEG(seg) | 3;			\
+} while (0)
 
 static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	get_user_try {
 
 #ifdef CONFIG_X86_32
-		GET_SEG(gs);
+		set_user_gs(regs, GET_SEG(gs));
 		COPY_SEG(fs);
 		COPY_SEG(es);
 		COPY_SEG(ds);
@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 	put_user_try {
 
 #ifdef CONFIG_X86_32
-		{
-			unsigned int tmp;
-
-			savesegment(gs, tmp);
-			put_user_ex(tmp, (unsigned int __user *)&sc->gs);
-		}
+		put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
 		put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
 		put_user_ex(regs->es, (unsigned int __user *)&sc->es);
 		put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf..55ea30d 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 	ret = KVM86->regs32;
 
 	ret->fs = current->thread.saved_fs;
-	loadsegment(gs, current->thread.saved_gs);
+	set_user_gs(ret, current->thread.saved_gs);
 
 	return ret;
 }
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	info->regs32->ax = 0;
 	tsk->thread.saved_sp0 = tsk->thread.sp0;
 	tsk->thread.saved_fs = info->regs32->fs;
-	savesegment(gs, tsk->thread.saved_gs);
+	tsk->thread.saved_gs = get_user_gs(info->regs32);
 
 	tss = &per_cpu(init_tss, get_cpu());
 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 420b3b6..6ef5e99 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 #endif /* PARANOID */
 
 	switch (segment) {
-		/* gs isn't used by the kernel, so it still has its
-		   user-space value. */
 	case PREFIX_GS_ - 1:
-		/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
-		savesegment(gs, addr->selector);
+		/* user gs handling can be lazy, use special accessors */
+		addr->selector = get_user_gs(FPU_info->regs);
 		break;
 	default:
 		addr->selector = PM_REG_(segment);
-- 
cgit v0.10.2


From ccbeed3a05908d201b47b6c3dd1a373138bba566 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:40 +0900
Subject: x86: make lazy %gs optional on x86_32

Impact: pt_regs changed, lazy gs handling made optional, add slight
        overhead to SAVE_ALL, simplifies error_code path a bit

On x86_32, %gs hasn't been used by kernel and handled lazily.  pt_regs
doesn't have place for it and gs is saved/loaded only when necessary.
In preparation for stack protector support, this patch makes lazy %gs
handling optional by doing the followings.

* Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs.

* Save and restore %gs along with other registers in entry_32.S unless
  LAZY_GS.  Note that this unfortunately adds "pushl $0" on SAVE_ALL
  even when LAZY_GS.  However, it adds no overhead to common exit path
  and simplifies entry path with error code.

* Define different user_gs accessors depending on LAZY_GS and add
  lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS.  The
  lazy_*_gs() ops are used to save, load and clear %gs lazily.

* Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly.

xen and lguest changes need to be verified.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5c8e353..5bcdede 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -207,6 +207,10 @@ config X86_TRAMPOLINE
 	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
+config X86_32_LAZY_GS
+	def_bool y
+	depends on X86_32
+
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 39b0aac..83c1bc8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
  * now struct_user_regs, they are different)
  */
 
-#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
+#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)	\
 do {						\
 	pr_reg[0] = regs->bx;			\
 	pr_reg[1] = regs->cx;			\
@@ -124,7 +124,6 @@ do {						\
 	pr_reg[7] = regs->ds & 0xffff;		\
 	pr_reg[8] = regs->es & 0xffff;		\
 	pr_reg[9] = regs->fs & 0xffff;		\
-	pr_reg[10] = get_user_gs(regs);		\
 	pr_reg[11] = regs->orig_ax;		\
 	pr_reg[12] = regs->ip;			\
 	pr_reg[13] = regs->cs & 0xffff;		\
@@ -133,6 +132,18 @@ do {						\
 	pr_reg[16] = regs->ss & 0xffff;		\
 } while (0);
 
+#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
+do {						\
+	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+	pr_reg[10] = get_user_gs(regs);		\
+} while (0);
+
+#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)	\
+do {						\
+	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+	savesegment(gs, pr_reg[10]);		\
+} while (0);
+
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4955165..f923203 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -79,7 +79,7 @@ do {						\
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)			\
 do {						\
-	set_user_gs(task_pt_regs(tsk), 0);	\
+	lazy_load_gs(0);			\
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)			\
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6d34d95..e304b66 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -28,7 +28,7 @@ struct pt_regs {
 	int  xds;
 	int  xes;
 	int  xfs;
-	/* int  gs; */
+	int  xgs;
 	long orig_eax;
 	long eip;
 	int  xcs;
@@ -50,7 +50,7 @@ struct pt_regs {
 	unsigned long ds;
 	unsigned long es;
 	unsigned long fs;
-	/* int  gs; */
+	unsigned long gs;
 	unsigned long orig_ax;
 	unsigned long ip;
 	unsigned long cs;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 70c74b8..79b98e5 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned);
  * x86_32 user gs accessors.
  */
 #ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_32_LAZY_GS
 #define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
 #define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
 #define task_user_gs(tsk)	((tsk)->thread.gs)
-#endif
+#define lazy_save_gs(v)		savesegment(gs, (v))
+#define lazy_load_gs(v)		loadsegment(gs, (v))
+#else	/* X86_32_LAZY_GS */
+#define get_user_gs(regs)	(u16)((regs)->gs)
+#define set_user_gs(regs, v)	do { (regs)->gs = (v); } while (0)
+#define task_user_gs(tsk)	(task_pt_regs(tsk)->gs)
+#define lazy_save_gs(v)		do { } while (0)
+#define lazy_load_gs(v)		do { } while (0)
+#endif	/* X86_32_LAZY_GS */
+#endif	/* X86_32 */
 
 static inline unsigned long get_limit(unsigned long segment)
 {
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08..fbf2f33 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@ void foo(void)
 	OFFSET(PT_DS,  pt_regs, ds);
 	OFFSET(PT_ES,  pt_regs, es);
 	OFFSET(PT_FS,  pt_regs, fs);
+	OFFSET(PT_GS,  pt_regs, gs);
 	OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
 	OFFSET(PT_EIP, pt_regs, ip);
 	OFFSET(PT_CS,  pt_regs, cs);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c461925..82e6868 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
  *	1C(%esp) - %ds
  *	20(%esp) - %es
  *	24(%esp) - %fs
- *	28(%esp) - orig_eax
- *	2C(%esp) - %eip
- *	30(%esp) - %cs
- *	34(%esp) - %eflags
- *	38(%esp) - %oldesp
- *	3C(%esp) - %oldss
+ *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
+ *	2C(%esp) - orig_eax
+ *	30(%esp) - %eip
+ *	34(%esp) - %cs
+ *	38(%esp) - %eflags
+ *	3C(%esp) - %oldesp
+ *	40(%esp) - %oldss
  *
  * "current" is in register %ebx during any slow entries.
  */
@@ -101,8 +102,99 @@
 #define resume_userspace_sig	resume_userspace
 #endif
 
+/*
+ * User gs save/restore
+ *
+ * %gs is used for userland TLS and kernel only uses it for stack
+ * canary which is required to be at %gs:20 by gcc.  Read the comment
+ * at the top of stackprotector.h for more info.
+ *
+ * Local labels 98 and 99 are used.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+
+ /* unfortunately push/pop can't be no-op */
+.macro PUSH_GS
+	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
+.endm
+.macro POP_GS pop=0
+	addl $(4 + \pop), %esp
+	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
+.endm
+.macro POP_GS_EX
+.endm
+
+ /* all the rest are no-op */
+.macro PTGS_TO_GS
+.endm
+.macro PTGS_TO_GS_EX
+.endm
+.macro GS_TO_REG reg
+.endm
+.macro REG_TO_PTGS reg
+.endm
+.macro SET_KERNEL_GS reg
+.endm
+
+#else	/* CONFIG_X86_32_LAZY_GS */
+
+.macro PUSH_GS
+	pushl %gs
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET gs, 0*/
+.endm
+
+.macro POP_GS pop=0
+98:	popl %gs
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_RESTORE gs*/
+  .if \pop <> 0
+	add $\pop, %esp
+	CFI_ADJUST_CFA_OFFSET -\pop
+  .endif
+.endm
+.macro POP_GS_EX
+.pushsection .fixup, "ax"
+99:	movl $0, (%esp)
+	jmp 98b
+.section __ex_table, "a"
+	.align 4
+	.long 98b, 99b
+.popsection
+.endm
+
+.macro PTGS_TO_GS
+98:	mov PT_GS(%esp), %gs
+.endm
+.macro PTGS_TO_GS_EX
+.pushsection .fixup, "ax"
+99:	movl $0, PT_GS(%esp)
+	jmp 98b
+.section __ex_table, "a"
+	.align 4
+	.long 98b, 99b
+.popsection
+.endm
+
+.macro GS_TO_REG reg
+	movl %gs, \reg
+	/*CFI_REGISTER gs, \reg*/
+.endm
+.macro REG_TO_PTGS reg
+	movl \reg, PT_GS(%esp)
+	/*CFI_REL_OFFSET gs, PT_GS*/
+.endm
+.macro SET_KERNEL_GS reg
+	xorl \reg, \reg
+	movl \reg, %gs
+.endm
+
+#endif	/* CONFIG_X86_32_LAZY_GS */
+
 .macro SAVE_ALL
 	cld
+	PUSH_GS
 	pushl %fs
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET fs, 0;*/
@@ -138,6 +230,7 @@
 	movl %edx, %es
 	movl $(__KERNEL_PERCPU), %edx
 	movl %edx, %fs
+	SET_KERNEL_GS %edx
 .endm
 
 .macro RESTORE_INT_REGS
@@ -164,7 +257,7 @@
 	CFI_RESTORE eax
 .endm
 
-.macro RESTORE_REGS
+.macro RESTORE_REGS pop=0
 	RESTORE_INT_REGS
 1:	popl %ds
 	CFI_ADJUST_CFA_OFFSET -4
@@ -175,6 +268,7 @@
 3:	popl %fs
 	CFI_ADJUST_CFA_OFFSET -4
 	/*CFI_RESTORE fs;*/
+	POP_GS \pop
 .pushsection .fixup, "ax"
 4:	movl $0, (%esp)
 	jmp 1b
@@ -188,6 +282,7 @@
 	.long 2b, 5b
 	.long 3b, 6b
 .popsection
+	POP_GS_EX
 .endm
 
 .macro RING0_INT_FRAME
@@ -368,6 +463,7 @@ sysenter_exit:
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 1:	mov  PT_FS(%esp), %fs
+	PTGS_TO_GS
 	ENABLE_INTERRUPTS_SYSEXIT
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -416,6 +512,7 @@ sysexit_audit:
 	.align 4
 	.long 1b,2b
 .popsection
+	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
 	# system call handler stub
@@ -458,8 +555,7 @@ restore_all:
 restore_nocheck:
 	TRACE_IRQS_IRET
 restore_nocheck_notrace:
-	RESTORE_REGS
-	addl $4, %esp			# skip orig_eax/error_code
+	RESTORE_REGS 4			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 irq_return:
 	INTERRUPT_RETURN
@@ -1078,7 +1174,10 @@ ENTRY(page_fault)
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
-	/* the function address is in %fs's slot on the stack */
+	/* the function address is in %gs's slot on the stack */
+	pushl %fs
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET fs, 0*/
 	pushl %es
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET es, 0*/
@@ -1107,20 +1206,15 @@ error_code:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ebx, 0
 	cld
-	pushl %fs
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET fs, 0*/
 	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
 	UNWIND_ESPFIX_STACK
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	/*CFI_REGISTER es, ecx*/
-	movl PT_FS(%esp), %edi		# get the function address
+	GS_TO_REG %ecx
+	movl PT_GS(%esp), %edi		# get the function address
 	movl PT_ORIG_EAX(%esp), %edx	# get the error code
 	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
-	mov  %ecx, PT_FS(%esp)
-	/*CFI_REL_OFFSET fs, ES*/
+	REG_TO_PTGS %ecx
+	SET_KERNEL_GS %ecx
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d58a340..86122fa 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * used %fs or %gs (it does not today), or if the kernel is
 	 * running inside of a hypervisor layer.
 	 */
-	savesegment(gs, prev->gs);
+	lazy_save_gs(prev->gs);
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
@@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Restore %gs if needed (which is common)
 	 */
 	if (prev->gs | next->gs)
-		loadsegment(gs, next->gs);
+		lazy_load_gs(next->gs);
 
 	percpu_write(current_task, next_p);
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 508b6b5..7ec39ab 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
-	regno >>= 2;
-	if (regno > FS)
-		--regno;
-	return &regs->bx + regno;
+	return &regs->bx + (regno >> 2);
 }
 
 static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 19e33b6..da2e314 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
 	/* There's one problem which normal hardware doesn't have: the Host
 	 * can't handle us removing entries we're currently using.  So we clear
 	 * the GS register here: if it's needed it'll be reloaded anyway. */
-	loadsegment(gs, 0);
+	lazy_load_gs(0);
 	lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3723034..95ff6a0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t,
 static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 {
 	/*
-	 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
-	 * it means we're in a context switch, and %gs has just been
-	 * saved.  This means we can zero it out to prevent faults on
-	 * exit from the hypervisor if the next process has no %gs.
-	 * Either way, it has been saved, and the new value will get
-	 * loaded properly.  This will go away as soon as Xen has been
-	 * modified to not save/restore %gs for normal hypercalls.
+	 * XXX sleazy hack: If we're being called in a lazy-cpu zone
+	 * and lazy gs handling is enabled, it means we're in a
+	 * context switch, and %gs has just been saved.  This means we
+	 * can zero it out to prevent faults on exit from the
+	 * hypervisor if the next process has no %gs.  Either way, it
+	 * has been saved, and the new value will get loaded properly.
+	 * This will go away as soon as Xen has been modified to not
+	 * save/restore %gs for normal hypercalls.
 	 *
 	 * On x86_64, this hack is not used for %gs, because gs points
 	 * to KERNEL_GS_BASE (and uses it for PDA references), so we
@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 	 */
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
 #ifdef CONFIG_X86_32
-		loadsegment(gs, 0);
+		lazy_load_gs(0);
 #else
 		loadsegment(fs, 0);
 #endif
-- 
cgit v0.10.2


From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:40 +0900
Subject: x86: implement x86_32 stack protector

Impact: stack protector for x86_32

Implement stack protector for x86_32.  GDT entry 28 is used for it.
It's set to point to stack_canary-20 and have the length of 24 bytes.
CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs
to the stack canary segment on entry.  As %gs is otherwise unused by
the kernel, the canary can be anywhere.  It's defined as a percpu
variable.

x86_32 exception handlers take register frame on stack directly as
struct pt_regs.  With -fstack-protector turned on, gcc copies the
whole structure after the stack canary and (of course) doesn't copy
back on return thus losing all changed.  For now, -fno-stack-protector
is added to all files which contain those functions.  We definitely
need something better.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bcdede..f760a22 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -209,7 +209,7 @@ config X86_TRAMPOLINE
 
 config X86_32_LAZY_GS
 	def_bool y
-	depends on X86_32
+	depends on X86_32 && !CC_STACKPROTECTOR
 
 config KTIME_SCALAR
 	def_bool X86_32
@@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL
 
 config CC_STACKPROTECTOR
 	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
-	depends on X86_64
 	select CC_STACKPROTECTOR_ALL
 	help
           This option turns on the -fstack-protector GCC feature. This
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9763eb7..5a94721 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
 DECLARE_INIT_PER_CPU(irq_stack_union);
 
 DECLARE_PER_CPU(char *, irq_stack_ptr);
+#else	/* X86_64 */
+#ifdef CONFIG_CC_STACKPROTECTOR
+DECLARE_PER_CPU(unsigned long, stack_canary);
 #endif
+#endif	/* X86_64 */
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int xstate_size;
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 1dc1b51..14e0ed8 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -61,7 +61,7 @@
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - unused
+ *  28 - stack_canary-20		[ for stack protector ]
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
@@ -95,6 +95,13 @@
 #define __KERNEL_PERCPU 0
 #endif
 
+#define GDT_ENTRY_STACK_CANARY		(GDT_ENTRY_KERNEL_BASE + 16)
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY * 8)
+#else
+#define __KERNEL_STACK_CANARY		0
+#endif
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index ee275e9..fa7e5bd 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -1,3 +1,35 @@
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function.  The pattern is called stack canary
+ * and unfortunately gcc requires it to be at a fixed offset from %gs.
+ * On x86_64, the offset is 40 bytes and on x86_32 20 bytes.  x86_64
+ * and x86_32 use segment registers differently and thus handles this
+ * requirement differently.
+ *
+ * On x86_64, %gs is shared by percpu area and stack canary.  All
+ * percpu symbols are zero based and %gs points to the base of percpu
+ * area.  The first occupant of the percpu area is always
+ * irq_stack_union which contains stack_canary at offset 40.  Userland
+ * %gs is always saved and restored on kernel entry and exit using
+ * swapgs, so stack protector doesn't add any complexity there.
+ *
+ * On x86_32, it's slightly more complicated.  As in x86_64, %gs is
+ * used for userland TLS.  Unfortunately, some processors are much
+ * slower at loading segment registers with different value when
+ * entering and leaving the kernel, so the kernel uses %fs for percpu
+ * area and manages %gs lazily so that %gs is switched only when
+ * necessary, usually during task switch.
+ *
+ * As gcc requires the stack canary at %gs:20, %gs can't be managed
+ * lazily if stack protector is enabled, so the kernel saves and
+ * restores userland %gs on kernel entry and exit.  This behavior is
+ * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
+ * system.h to hide the details.
+ */
+
 #ifndef _ASM_STACKPROTECTOR_H
 #define _ASM_STACKPROTECTOR_H 1
 
@@ -6,9 +38,19 @@
 #include <asm/tsc.h>
 #include <asm/processor.h>
 #include <asm/percpu.h>
+#include <asm/system.h>
+#include <asm/desc.h>
 #include <linux/random.h>
 
 /*
+ * 24 byte read-only segment initializer for stack canary.  Linker
+ * can't handle the address bit shifting.  Address will be set in
+ * head_32 for boot CPU and setup_per_cpu_areas() for others.
+ */
+#define GDT_STACK_CANARY_INIT						\
+	[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
+
+/*
  * Initialize the stackprotector canary value.
  *
  * NOTE: this must only be called from functions that never return,
@@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
 	u64 canary;
 	u64 tsc;
 
-	/*
-	 * Build time only check to make sure the stack_canary is at
-	 * offset 40 in the pda; this is a gcc ABI requirement
-	 */
+#ifdef CONFIG_X86_64
 	BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
-
+#endif
 	/*
 	 * We both use the random pool and the current TSC as a source
 	 * of randomness. The TSC only matters for very early init,
@@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void)
 	canary += tsc + (tsc << 32UL);
 
 	current->stack_canary = canary;
+#ifdef CONFIG_X86_64
 	percpu_write(irq_stack_union.stack_canary, canary);
+#else
+	percpu_write(stack_canary, canary);
+#endif
+}
+
+static inline void setup_stack_canary_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
+	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct desc;
+
+	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
+	desc.base0 = canary & 0xffff;
+	desc.base1 = (canary >> 16) & 0xff;
+	desc.base2 = (canary >> 24) & 0xff;
+	write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
+#endif
+}
+
+static inline void load_stack_canary_segment(void)
+{
+#ifdef CONFIG_X86_32
+	asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
+#endif
+}
+
+#else	/* CC_STACKPROTECTOR */
+
+#define GDT_STACK_CANARY_INIT
+
+/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
+
+static inline void setup_stack_canary_segment(int cpu)
+{ }
+
+static inline void load_stack_canary_segment(void)
+{
+#ifdef CONFIG_X86_32
+	asm volatile ("mov %0, %%gs" : : "r" (0));
+#endif
 }
 
 #endif	/* CC_STACKPROTECTOR */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 79b98e5..2692ee8 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 #ifdef CONFIG_X86_32
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary							\
+	"movl "__percpu_arg([current_task])",%%ebx\n\t"			\
+	"movl %P[task_canary](%%ebx),%%ebx\n\t"				\
+	"movl %%ebx,"__percpu_arg([stack_canary])"\n\t"
+#define __switch_canary_oparam						\
+	, [stack_canary] "=m" (per_cpu_var(stack_canary))
+#define __switch_canary_iparam						\
+	, [current_task] "m" (per_cpu_var(current_task))		\
+	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+#else	/* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_oparam
+#define __switch_canary_iparam
+#endif	/* CC_STACKPROTECTOR */
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +62,7 @@ do {									\
 		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "1:\t"						\
+		     __switch_canary					\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
 		     "popfl\n"			/* restore flags */	\
 									\
@@ -58,6 +75,8 @@ do {									\
 		       "=b" (ebx), "=c" (ecx), "=d" (edx),		\
 		       "=S" (esi), "=D" (edi)				\
 		       							\
+		       __switch_canary_oparam				\
+									\
 		       /* input parameters: */				\
 		     : [next_sp]  "m" (next->thread.sp),		\
 		       [next_ip]  "m" (next->thread.ip),		\
@@ -66,6 +85,8 @@ do {									\
 		       [prev]     "a" (prev),				\
 		       [next]     "d" (next)				\
 									\
+		       __switch_canary_iparam				\
+									\
 		     : /* reloaded segment registers */			\
 			"memory");					\
 } while (0)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 37fa30b..b1f8be3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o	:= $(PROFILING) -g0 $(nostackp)
 CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 CFLAGS_paravirt.o	:= $(nostackp)
+#
+# On x86_32, register frame is passed verbatim on stack as struct
+# pt_regs.  gcc considers the parameter to belong to the callee and
+# with -fstack-protector it copies pt_regs to the callee's stack frame
+# to put the structure after the stack canary causing changes made by
+# the exception handlers to be lost.  Turn off stack protector for all
+# files containing functions which take struct pt_regs from register
+# frame.
+#
+# The proper way to fix this is to teach gcc that the argument belongs
+# to the caller for these functions, oh well...
+#
+ifdef CONFIG_X86_32
+CFLAGS_process_32.o	:= $(nostackp)
+CFLAGS_vm86_32.o	:= $(nostackp)
+CFLAGS_signal.o		:= $(nostackp)
+CFLAGS_traps.o		:= $(nostackp)
+endif
 
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 41b0de6..260fe4c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -39,6 +39,7 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/hypervisor.h>
+#include <asm/stackprotector.h>
 
 #include "cpu.h"
 
@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 
 	[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
 	[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+	GDT_STACK_CANARY_INIT
 #endif
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -261,6 +263,7 @@ void load_percpu_segment(int cpu)
 	loadsegment(gs, 0);
 	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
+	load_stack_canary_segment();
 }
 
 /* Current gdt points %fs at the "master" per-cpu area: after this,
@@ -946,16 +949,21 @@ unsigned long kernel_eflags;
  */
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
-#else
+#else	/* x86_64 */
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+DEFINE_PER_CPU(unsigned long, stack_canary);
+#endif
 
-/* Make sure %fs is initialized properly in idle threads */
+/* Make sure %fs and %gs are initialized properly in idle threads */
 struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
 	regs->fs = __KERNEL_PERCPU;
+	regs->gs = __KERNEL_STACK_CANARY;
 	return regs;
 }
-#endif
+#endif	/* x86_64 */
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
@@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void)
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 #endif
 
-	/* Clear %gs. */
-	asm volatile ("mov %0, %%gs" : : "r" (0));
-
 	/* Clear all 6 debug registers: */
 	set_debugreg(0, 0);
 	set_debugreg(0, 1);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 82e6868..5f5bd22 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -186,7 +186,7 @@
 	/*CFI_REL_OFFSET gs, PT_GS*/
 .endm
 .macro SET_KERNEL_GS reg
-	xorl \reg, \reg
+	movl $(__KERNEL_STACK_CANARY), \reg
 	movl \reg, %gs
 .endm
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 24c0e5c..924e316 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
 #include <asm/processor-flags.h>
+#include <asm/percpu.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -437,8 +438,25 @@ is386:	movl $2,%ecx		# set MP
 	movl $(__KERNEL_PERCPU), %eax
 	movl %eax,%fs			# set this cpu's percpu
 
-	xorl %eax,%eax			# Clear GS and LDT
+#ifdef CONFIG_CC_STACKPROTECTOR
+	/*
+	 * The linker can't handle this by relocation.  Manually set
+	 * base address in stack canary segment descriptor.
+	 */
+	cmpb $0,ready
+	jne 1f
+	movl $per_cpu__gdt_page,%eax
+	movl $per_cpu__stack_canary,%ecx
+	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
+	shrl $16, %ecx
+	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
+	movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
+1:
+#endif
+	movl $(__KERNEL_STACK_CANARY),%eax
 	movl %eax,%gs
+
+	xorl %eax,%eax			# Clear LDT
 	lldt %ax
 
 	cld			# gcc2 wants the direction flag cleared at all times
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 86122fa..9a62383e 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	regs.ds = __USER_DS;
 	regs.es = __USER_DS;
 	regs.fs = __KERNEL_PERCPU;
+	regs.gs = __KERNEL_STACK_CANARY;
 	regs.orig_ax = -1;
 	regs.ip = (unsigned long) kernel_thread_helper;
 	regs.cs = __KERNEL_CS | get_kernel_rpl();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ef91747..d992e6c 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -16,6 +16,7 @@
 #include <asm/proto.h>
 #include <asm/cpumask.h>
 #include <asm/cpu.h>
+#include <asm/stackprotector.h>
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 # define DBG(x...) printk(KERN_DEBUG x)
@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
+		setup_stack_canary_segment(cpu);
 		/*
 		 * Copy data used in early init routines from the
 		 * initial arrays to the per cpu data areas.  These
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
new file mode 100644
index 0000000..4fdf6ce
--- /dev/null
+++ b/scripts/gcc-x86_32-has-stack-protector.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+if [ "$?" -eq "0" ] ; then
+	echo y
+else
+	echo n
+fi
-- 
cgit v0.10.2


From 5c79d2a517a9905599d192db8ce77ab5f1a2faca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Feb 2009 16:31:00 +0900
Subject: x86: fix x86_32 stack protector bugs

Impact: fix x86_32 stack protector

Brian Gerst found out that %gs was being initialized to stack_canary
instead of stack_canary - 20, which basically gave the same canary
value for all threads.  Fixing this also exposed the following bugs.

* cpu_idle() didn't call boot_init_stack_canary()

* stack canary switching in switch_to() was being done too late making
  the initial run of a new thread use the old stack canary value.

Fix all of them and while at it update comment in cpu_idle() about
calling boot_init_stack_canary().

Reported-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index fa7e5bd..c2d742c 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -85,7 +85,7 @@ static __always_inline void boot_init_stack_canary(void)
 static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
-	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
+	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
 	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
 	struct desc_struct desc;
 
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2692ee8..7a80f72 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -25,13 +25,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							\
-	"movl "__percpu_arg([current_task])",%%ebx\n\t"			\
-	"movl %P[task_canary](%%ebx),%%ebx\n\t"				\
-	"movl %%ebx,"__percpu_arg([stack_canary])"\n\t"
+	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
+	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
 	, [stack_canary] "=m" (per_cpu_var(stack_canary))
 #define __switch_canary_iparam						\
-	, [current_task] "m" (per_cpu_var(current_task))		\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
 #define __switch_canary
@@ -60,9 +58,9 @@ do {									\
 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
 		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
+		     __switch_canary					\
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "1:\t"						\
-		     __switch_canary					\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
 		     "popfl\n"			/* restore flags */	\
 									\
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 924e316..cf21fd0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -447,6 +447,7 @@ is386:	movl $2,%ecx		# set MP
 	jne 1f
 	movl $per_cpu__gdt_page,%eax
 	movl $per_cpu__stack_canary,%ecx
+	subl $20, %ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a62383e..b50604b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
 
 #include <stdarg.h>
 
+#include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
@@ -91,6 +92,15 @@ void cpu_idle(void)
 {
 	int cpu = smp_processor_id();
 
+	/*
+	 * If we're the non-boot CPU, nothing set the stack canary up
+	 * for us.  CPU0 already has it initialized but no harm in
+	 * doing it again.  This is a good place for updating it, as
+	 * we wont ever return from this function (so the invalid
+	 * canaries already on the stack wont ever trigger).
+	 */
+	boot_init_stack_canary();
+
 	current_thread_info()->status |= TS_POLLING;
 
 	/* endless idle loop with no priority at all */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8eb169e..836ef65 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -120,12 +120,11 @@ void cpu_idle(void)
 	current_thread_info()->status |= TS_POLLING;
 
 	/*
-	 * If we're the non-boot CPU, nothing set the PDA stack
-	 * canary up for us - and if we are the boot CPU we have
-	 * a 0 stack canary. This is a good place for updating
-	 * it, as we wont ever return from this function (so the
-	 * invalid canaries already on the stack wont ever
-	 * trigger):
+	 * If we're the non-boot CPU, nothing set the stack canary up
+	 * for us.  CPU0 already has it initialized but no harm in
+	 * doing it again.  This is a good place for updating it, as
+	 * we wont ever return from this function (so the invalid
+	 * canaries already on the stack wont ever trigger).
 	 */
 	boot_init_stack_canary();
 
-- 
cgit v0.10.2


From ebd9026d9f8499abc60d82d949bd37f88fe34a41 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 11 Feb 2009 12:17:29 +0100
Subject: stackprotector: fix multi-word cross-builds

Stackprotector builds were failing if CROSS_COMPILER was more than
a single world (such as when distcc was used) - because the check
scripts used $1 instead of $*.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
index 4fdf6ce..29493dc 100644
--- a/scripts/gcc-x86_32-has-stack-protector.sh
+++ b/scripts/gcc-x86_32-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
index 2d69fcd..afaec61 100644
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
-- 
cgit v0.10.2


From aa78bcfa01dec3cdbde3cda098ce32abbd9c3bf6 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 10 Feb 2009 09:51:45 -0500
Subject: x86: use pt_regs pointer in do_device_not_available()

The generic exception handler (error_code) passes in the pt_regs
pointer and the error code (unused in this case).  The commit
"x86: fix math_emu register frame access" changed this to pass by
value, which doesn't work correctly with stack protector enabled.
Change it back to use the pt_regs pointer.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index cf3bb05..0d53425 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long);
 dotraplinkage void do_overflow(struct pt_regs *, long);
 dotraplinkage void do_bounds(struct pt_regs *, long);
 dotraplinkage void do_invalid_op(struct pt_regs *, long);
-dotraplinkage void do_device_not_available(struct pt_regs);
+dotraplinkage void do_device_not_available(struct pt_regs *, long);
 dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
 dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
 dotraplinkage void do_segment_not_present(struct pt_regs *, long);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3b7b2e1..71a8f87 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info)
 }
 #endif /* CONFIG_MATH_EMULATION */
 
-dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
+dotraplinkage void __kprobes
+do_device_not_available(struct pt_regs *regs, long error_code)
 {
 #ifdef CONFIG_X86_32
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
 
-		conditional_sti(&regs);
+		conditional_sti(regs);
 
-		info.regs = &regs;
+		info.regs = regs;
 		math_emulate(&info);
 	} else {
 		math_state_restore(); /* interrupts still off */
-		conditional_sti(&regs);
+		conditional_sti(regs);
 	}
 #else
 	math_state_restore();
-- 
cgit v0.10.2


From 253f29a4ae9cc6cdc7b94f96517f27a93885a6ce Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 10 Feb 2009 09:51:46 -0500
Subject: x86: pass in pt_regs pointer for syscalls that need it

Some syscalls need to access the pt_regs structure, either to copy
user register state or to modifiy it.  This patch adds stubs to load
the address of the pt_regs struct into the %eax register, and changes
the syscalls to regparm(1) to receive the pt_regs pointer as the
first argument.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 5d98d0b6..2fd5926 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -18,6 +18,13 @@
 #define asmregparm __attribute__((regparm(3)))
 
 /*
+ * For syscalls that need a pointer to the pt_regs struct (ie. fork).
+ * The regs pointer is passed in %eax as the first argument.  The
+ * remaining function arguments remain on the stack.
+ */
+#define ptregscall __attribute__((regparm(1)))
+
+/*
  * Make sure the compiler doesn't do anything stupid with the
  * arguments on the stack - they are owned by the *caller*, not
  * the callee. This just fools gcc into not spilling into them,
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index c0b0bda..61729525 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -29,21 +29,26 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
 /* X86_32 only */
 #ifdef CONFIG_X86_32
 /* kernel/process_32.c */
-asmlinkage int sys_fork(struct pt_regs);
-asmlinkage int sys_clone(struct pt_regs);
-asmlinkage int sys_vfork(struct pt_regs);
-asmlinkage int sys_execve(struct pt_regs);
+ptregscall int sys_fork(struct pt_regs *);
+ptregscall int sys_clone(struct pt_regs *, unsigned long,
+			 unsigned long, int __user *,
+			 unsigned long, int __user *);
+ptregscall int sys_vfork(struct pt_regs *);
+ptregscall int sys_execve(struct pt_regs *, char __user *,
+			  char __user * __user *,
+			  char __user * __user *);
 
 /* kernel/signal_32.c */
 asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
 asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
-asmlinkage int sys_sigaltstack(unsigned long);
-asmlinkage unsigned long sys_sigreturn(unsigned long);
-asmlinkage int sys_rt_sigreturn(unsigned long);
+ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *,
+			       stack_t __user *);
+ptregscall unsigned long sys_sigreturn(struct pt_regs *);
+ptregscall int sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned long);
+ptregscall long sys_iopl(struct pt_regs *, unsigned int);
 
 /* kernel/sys_i386_32.c */
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
@@ -59,8 +64,8 @@ struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
 /* kernel/vm86_32.c */
-asmlinkage int sys_vm86old(struct pt_regs);
-asmlinkage int sys_vm86(struct pt_regs);
+ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *);
+ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long);
 
 #else /* CONFIG_X86_32 */
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5f5bd22..3de7b57 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -697,6 +697,26 @@ syscall_badsys:
 END(syscall_badsys)
 	CFI_ENDPROC
 
+/*
+ * System calls that need a pt_regs pointer.
+ */
+#define PTREGSCALL(name) \
+	ALIGN; \
+ptregs_##name: \
+	leal 4(%esp),%eax; \
+	jmp sys_##name;
+
+PTREGSCALL(iopl)
+PTREGSCALL(fork)
+PTREGSCALL(clone)
+PTREGSCALL(vfork)
+PTREGSCALL(execve)
+PTREGSCALL(sigaltstack)
+PTREGSCALL(sigreturn)
+PTREGSCALL(rt_sigreturn)
+PTREGSCALL(vm86)
+PTREGSCALL(vm86old)
+
 .macro FIXUP_ESPFIX_STACK
 	/* since we are on a wrong stack, we cant make it a C code :( */
 	PER_CPU(gdt_page, %ebx)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f..7ec1486 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,10 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_32
-asmlinkage long sys_iopl(unsigned long regsp)
+ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level)
 {
-	struct pt_regs *regs = (struct pt_regs *)&regsp;
-	unsigned int level = regs->bx;
 	struct thread_struct *t = &current->thread;
 	int rc;
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b50604b..5a9dcfb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -603,24 +603,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	return prev_p;
 }
 
-asmlinkage int sys_fork(struct pt_regs regs)
+ptregscall int sys_fork(struct pt_regs *regs)
 {
-	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
-asmlinkage int sys_clone(struct pt_regs regs)
+ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags,
+			 unsigned long newsp, int __user *parent_tidptr,
+			 unsigned long unused, int __user *child_tidptr)
 {
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	clone_flags = regs.bx;
-	newsp = regs.cx;
-	parent_tidptr = (int __user *)regs.dx;
-	child_tidptr = (int __user *)regs.di;
 	if (!newsp)
-		newsp = regs.sp;
-	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
 }
 
 /*
@@ -633,27 +627,26 @@ asmlinkage int sys_clone(struct pt_regs regs)
  * do not have enough call-clobbered registers to hold all
  * the information you need.
  */
-asmlinkage int sys_vfork(struct pt_regs regs)
+ptregscall int sys_vfork(struct pt_regs *regs)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(struct pt_regs regs)
+ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename,
+			  char __user * __user *argv,
+			  char __user * __user *envp)
 {
 	int error;
 	char *filename;
 
-	filename = getname((char __user *) regs.bx);
+	filename = getname(u_filename);
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename,
-			(char __user * __user *) regs.cx,
-			(char __user * __user *) regs.dx,
-			&regs);
+	error = do_execve(filename, argv, envp, regs);
 	if (error == 0) {
 		/* Make sure we don't return using sysenter.. */
 		set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 8562387..d7a1583 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -549,39 +549,28 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 #endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
-	/*
-	 * This is needed to make gcc realize it doesn't own the
-	 * "struct pt_regs"
-	 */
-	struct pt_regs *regs = (struct pt_regs *)&bx;
-	const stack_t __user *uss = (const stack_t __user *)bx;
-	stack_t __user *uoss = (stack_t __user *)regs->cx;
-
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
+ptregscall int
+sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss,
+		stack_t __user *uoss)
 #else /* !CONFIG_X86_32 */
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
+#endif /* CONFIG_X86_32 */
 {
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
-#endif /* CONFIG_X86_32 */
 
 /*
  * Do a signal return; undo the signal stack.
  */
 #ifdef CONFIG_X86_32
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+ptregscall unsigned long sys_sigreturn(struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
-	struct pt_regs *regs;
 	unsigned long ax;
 	sigset_t set;
 
-	regs = (struct pt_regs *) &__unused;
 	frame = (struct sigframe __user *)(regs->sp - 8);
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -640,23 +629,13 @@ badframe:
 }
 
 #ifdef CONFIG_X86_32
-/*
- * Note: do not pass in pt_regs directly as with tail-call optimization
- * GCC will incorrectly stomp on the caller's frame and corrupt user-space
- * register state:
- */
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
-
-	return do_rt_sigreturn(regs);
-}
+ptregscall int sys_rt_sigreturn(struct pt_regs *regs)
 #else /* !CONFIG_X86_32 */
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+#endif /* CONFIG_X86_32 */
 {
 	return do_rt_sigreturn(regs);
 }
-#endif /* CONFIG_X86_32 */
 
 /*
  * OK, we're invoking a handler:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a0..3bdb648 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
 ENTRY(sys_call_table)
 	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
 	.long sys_exit
-	.long sys_fork
+	.long ptregs_fork
 	.long sys_read
 	.long sys_write
 	.long sys_open		/* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
 	.long sys_creat
 	.long sys_link
 	.long sys_unlink	/* 10 */
-	.long sys_execve
+	.long ptregs_execve
 	.long sys_chdir
 	.long sys_time
 	.long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
 	.long sys_newlstat
 	.long sys_newfstat
 	.long sys_uname
-	.long sys_iopl		/* 110 */
+	.long ptregs_iopl	/* 110 */
 	.long sys_vhangup
 	.long sys_ni_syscall	/* old "idle" system call */
-	.long sys_vm86old
+	.long ptregs_vm86old
 	.long sys_wait4
 	.long sys_swapoff	/* 115 */
 	.long sys_sysinfo
 	.long sys_ipc
 	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone		/* 120 */
+	.long ptregs_sigreturn
+	.long ptregs_clone	/* 120 */
 	.long sys_setdomainname
 	.long sys_newuname
 	.long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
 	.long sys_mremap
 	.long sys_setresuid16
 	.long sys_getresuid16	/* 165 */
-	.long sys_vm86
+	.long ptregs_vm86
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
 	.long sys_nfsservctl
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
-	.long sys_rt_sigreturn
+	.long ptregs_rt_sigreturn
 	.long sys_rt_sigaction
 	.long sys_rt_sigprocmask	/* 175 */
 	.long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
 	.long sys_getcwd
 	.long sys_capget
 	.long sys_capset	/* 185 */
-	.long sys_sigaltstack
+	.long ptregs_sigaltstack
 	.long sys_sendfile
 	.long sys_ni_syscall	/* reserved for streams1 */
 	.long sys_ni_syscall	/* reserved for streams2 */
-	.long sys_vfork		/* 190 */
+	.long ptregs_vfork	/* 190 */
 	.long sys_getrlimit
 	.long sys_mmap2
 	.long sys_truncate64
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 55ea30d..8fa6ba7 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -197,9 +197,8 @@ out:
 static int do_vm86_irq_handling(int subfunction, int irqnumber);
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
 
-asmlinkage int sys_vm86old(struct pt_regs regs)
+ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86)
 {
-	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
 					 * This remains on the stack until we
@@ -218,7 +217,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
 	if (tmp)
 		goto out;
 	memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
-	info.regs32 = &regs;
+	info.regs32 = regs;
 	tsk->thread.vm86_info = v86;
 	do_sys_vm86(&info, tsk);
 	ret = 0;	/* we never return here */
@@ -227,7 +226,7 @@ out:
 }
 
 
-asmlinkage int sys_vm86(struct pt_regs regs)
+ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg)
 {
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
@@ -239,12 +238,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 	struct vm86plus_struct __user *v86;
 
 	tsk = current;
-	switch (regs.bx) {
+	switch (cmd) {
 	case VM86_REQUEST_IRQ:
 	case VM86_FREE_IRQ:
 	case VM86_GET_IRQ_BITS:
 	case VM86_GET_AND_RESET_IRQ:
-		ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
+		ret = do_vm86_irq_handling(cmd, (int)arg);
 		goto out;
 	case VM86_PLUS_INSTALL_CHECK:
 		/*
@@ -261,14 +260,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 	ret = -EPERM;
 	if (tsk->thread.saved_sp0)
 		goto out;
-	v86 = (struct vm86plus_struct __user *)regs.cx;
+	v86 = (struct vm86plus_struct __user *)arg;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, regs32) -
 				       sizeof(info.regs));
 	ret = -EFAULT;
 	if (tmp)
 		goto out;
-	info.regs32 = &regs;
+	info.regs32 = regs;
 	info.vm86plus.is_vm86pus = 1;
 	tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
 	do_sys_vm86(&info, tsk);
-- 
cgit v0.10.2


From 9c8bb6b534d1c89a20bf9bb45e1471cf8f4747c0 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 10 Feb 2009 09:51:47 -0500
Subject: x86: drop -fno-stack-protector annotations after pt_regs fixes

Now that no functions rely on struct pt_regs being passed by value,
various "no stack protector" annotations can be dropped.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1f8be3..37fa30b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,24 +24,6 @@ CFLAGS_vsyscall_64.o	:= $(PROFILING) -g0 $(nostackp)
 CFLAGS_hpet.o		:= $(nostackp)
 CFLAGS_tsc.o		:= $(nostackp)
 CFLAGS_paravirt.o	:= $(nostackp)
-#
-# On x86_32, register frame is passed verbatim on stack as struct
-# pt_regs.  gcc considers the parameter to belong to the callee and
-# with -fstack-protector it copies pt_regs to the callee's stack frame
-# to put the structure after the stack canary causing changes made by
-# the exception handlers to be lost.  Turn off stack protector for all
-# files containing functions which take struct pt_regs from register
-# frame.
-#
-# The proper way to fix this is to teach gcc that the argument belongs
-# to the caller for these functions, oh well...
-#
-ifdef CONFIG_X86_32
-CFLAGS_process_32.o	:= $(nostackp)
-CFLAGS_vm86_32.o	:= $(nostackp)
-CFLAGS_signal.o		:= $(nostackp)
-CFLAGS_traps.o		:= $(nostackp)
-endif
 
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-- 
cgit v0.10.2


From 1c0040047d5499599cc231ca3f105be3ceff8562 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Mon, 9 Feb 2009 10:25:20 -0600
Subject: SGI IA64 UV: fix ia64 build error in the linux-next tree

Fix the ia64 build error that occurs in the linux-next tree by introducing
an ia64 version of uv.h.

Additionally, clean up the usage of is_uv_system().

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h
new file mode 100644
index 0000000..61b5bdf
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_IA64_UV_UV_H
+#define _ASM_IA64_UV_UV_H
+
+#include <asm/system.h>
+#include <asm/sn/simulator.h>
+
+static inline int is_uv_system(void)
+{
+	/* temporary support for running on hardware simulator */
+	return IS_MEDUSA() || ia64_platform_is("uv");
+}
+
+#endif	/* _ASM_IA64_UV_UV_H */
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
index 1b5f579..f93f03a 100644
--- a/drivers/misc/sgi-gru/gru.h
+++ b/drivers/misc/sgi-gru/gru.h
@@ -19,8 +19,6 @@
 #ifndef __GRU_H__
 #define __GRU_H__
 
-#include <asm/uv/uv.h>
-
 /*
  * GRU architectural definitions
  */
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 6509838..c67e4e8 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -36,23 +36,11 @@
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/uaccess.h>
+#include <asm/uv/uv.h>
 #include "gru.h"
 #include "grulib.h"
 #include "grutables.h"
 
-#if defined CONFIG_X86_64
-#include <asm/genapic.h>
-#include <asm/irq.h>
-#define IS_UV()		is_uv_system()
-#elif defined CONFIG_IA64
-#include <asm/system.h>
-#include <asm/sn/simulator.h>
-/* temp support for running on hardware simulator */
-#define IS_UV()		IS_MEDUSA() || ia64_platform_is("uv")
-#else
-#define IS_UV()		0
-#endif
-
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_mmrs.h>
 
@@ -381,7 +369,7 @@ static int __init gru_init(void)
 	char id[10];
 	void *gru_start_vaddr;
 
-	if (!IS_UV())
+	if (!is_uv_system())
 		return 0;
 
 #if defined CONFIG_IA64
@@ -451,7 +439,7 @@ static void __exit gru_exit(void)
 	int order = get_order(sizeof(struct gru_state) *
 			      GRU_CHIPLETS_PER_BLADE);
 
-	if (!IS_UV())
+	if (!is_uv_system())
 		return;
 
 	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 069ad3a..2275126 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -15,21 +15,19 @@
 
 #include <linux/mutex.h>
 
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
 #include <asm/uv/uv.h>
+#define is_uv()		is_uv_system()
+#endif
+
+#ifndef is_uv
+#define is_uv()		0
+#endif
 
-#ifdef CONFIG_IA64
+#if defined CONFIG_IA64
 #include <asm/system.h>
 #include <asm/sn/arch.h>	/* defines is_shub1() and is_shub2() */
 #define is_shub()	ia64_platform_is("sn2")
-#ifdef CONFIG_IA64_SGI_UV
-#define is_uv()		ia64_platform_is("uv")
-#else
-#define is_uv()		0
-#endif
-#endif
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
-#define is_uv()		is_uv_system()
 #endif
 
 #ifndef is_shub1
@@ -44,10 +42,6 @@
 #define is_shub()	0
 #endif
 
-#ifndef is_uv
-#define is_uv()		0
-#endif
-
 #ifdef USE_DBUG_ON
 #define DBUG_ON(condition)	BUG_ON(condition)
 #else
-- 
cgit v0.10.2


From b12bdaf11f935d7be030207e3c77faeaeab8ded3 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Wed, 11 Feb 2009 16:43:58 -0500
Subject: x86: use regparm(3) for passed-in pt_regs pointer

Some syscalls need to access the pt_regs structure, either to copy
user register state or to modifiy it.  This patch adds stubs to load
the address of the pt_regs struct into the %eax register, and changes
the syscalls to take the pointer as an argument instead of relying on
the assumption that the pt_regs structure overlaps the function
arguments.

Drop the use of regparm(1) due to concern about gcc bugs, and to move
in the direction of the eventual removal of regparm(0) for asmlinkage.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 2fd5926..5d98d0b6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -18,13 +18,6 @@
 #define asmregparm __attribute__((regparm(3)))
 
 /*
- * For syscalls that need a pointer to the pt_regs struct (ie. fork).
- * The regs pointer is passed in %eax as the first argument.  The
- * remaining function arguments remain on the stack.
- */
-#define ptregscall __attribute__((regparm(1)))
-
-/*
  * Make sure the compiler doesn't do anything stupid with the
  * arguments on the stack - they are owned by the *caller*, not
  * the callee. This just fools gcc into not spilling into them,
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 61729525..77bb31a 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -29,26 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
 /* X86_32 only */
 #ifdef CONFIG_X86_32
 /* kernel/process_32.c */
-ptregscall int sys_fork(struct pt_regs *);
-ptregscall int sys_clone(struct pt_regs *, unsigned long,
-			 unsigned long, int __user *,
-			 unsigned long, int __user *);
-ptregscall int sys_vfork(struct pt_regs *);
-ptregscall int sys_execve(struct pt_regs *, char __user *,
-			  char __user * __user *,
-			  char __user * __user *);
+int sys_fork(struct pt_regs *);
+int sys_clone(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+int sys_execve(struct pt_regs *);
 
 /* kernel/signal_32.c */
 asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
 asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
-ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *,
-			       stack_t __user *);
-ptregscall unsigned long sys_sigreturn(struct pt_regs *);
-ptregscall int sys_rt_sigreturn(struct pt_regs *);
+int sys_sigaltstack(struct pt_regs *);
+unsigned long sys_sigreturn(struct pt_regs *);
+int sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/ioport.c */
-ptregscall long sys_iopl(struct pt_regs *, unsigned int);
+long sys_iopl(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
@@ -64,8 +59,8 @@ struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
 /* kernel/vm86_32.c */
-ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *);
-ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long);
+int sys_vm86old(struct pt_regs *);
+int sys_vm86(struct pt_regs *);
 
 #else /* CONFIG_X86_32 */
 
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 7ec1486..e41980a 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,8 +131,9 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_32
-ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level)
+long sys_iopl(struct pt_regs *regs)
 {
+	unsigned int level = regs->bx;
 	struct thread_struct *t = &current->thread;
 	int rc;
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5a9dcfb..fec79ad 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -603,15 +603,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	return prev_p;
 }
 
-ptregscall int sys_fork(struct pt_regs *regs)
+int sys_fork(struct pt_regs *regs)
 {
 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
-ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags,
-			 unsigned long newsp, int __user *parent_tidptr,
-			 unsigned long unused, int __user *child_tidptr)
+int sys_clone(struct pt_regs *regs)
 {
+	unsigned long clone_flags;
+	unsigned long newsp;
+	int __user *parent_tidptr, *child_tidptr;
+
+	clone_flags = regs->bx;
+	newsp = regs->cx;
+	parent_tidptr = (int __user *)regs->dx;
+	child_tidptr = (int __user *)regs->di;
 	if (!newsp)
 		newsp = regs->sp;
 	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
@@ -627,7 +633,7 @@ ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags,
  * do not have enough call-clobbered registers to hold all
  * the information you need.
  */
-ptregscall int sys_vfork(struct pt_regs *regs)
+int sys_vfork(struct pt_regs *regs)
 {
 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
@@ -635,18 +641,19 @@ ptregscall int sys_vfork(struct pt_regs *regs)
 /*
  * sys_execve() executes a new program.
  */
-ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename,
-			  char __user * __user *argv,
-			  char __user * __user *envp)
+int sys_execve(struct pt_regs *regs)
 {
 	int error;
 	char *filename;
 
-	filename = getname(u_filename);
+	filename = getname((char __user *) regs->bx);
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename, argv, envp, regs);
+	error = do_execve(filename,
+			(char __user * __user *) regs->cx,
+			(char __user * __user *) regs->dx,
+			regs);
 	if (error == 0) {
 		/* Make sure we don't return using sysenter.. */
 		set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d7a1583..ccfb274 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -549,23 +549,27 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 #endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_X86_32
-ptregscall int
-sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss,
-		stack_t __user *uoss)
+int sys_sigaltstack(struct pt_regs *regs)
+{
+	const stack_t __user *uss = (const stack_t __user *)regs->bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
 #else /* !CONFIG_X86_32 */
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
-#endif /* CONFIG_X86_32 */
 {
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
+#endif /* CONFIG_X86_32 */
 
 /*
  * Do a signal return; undo the signal stack.
  */
 #ifdef CONFIG_X86_32
-ptregscall unsigned long sys_sigreturn(struct pt_regs *regs)
+unsigned long sys_sigreturn(struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
 	unsigned long ax;
@@ -629,13 +633,16 @@ badframe:
 }
 
 #ifdef CONFIG_X86_32
-ptregscall int sys_rt_sigreturn(struct pt_regs *regs)
+int sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
 #else /* !CONFIG_X86_32 */
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-#endif /* CONFIG_X86_32 */
 {
 	return do_rt_sigreturn(regs);
 }
+#endif /* CONFIG_X86_32 */
 
 /*
  * OK, we're invoking a handler:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 8fa6ba7..d7ac84e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -197,8 +197,9 @@ out:
 static int do_vm86_irq_handling(int subfunction, int irqnumber);
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
 
-ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86)
+int sys_vm86old(struct pt_regs *regs)
 {
+	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
 					 * This remains on the stack until we
@@ -226,7 +227,7 @@ out:
 }
 
 
-ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg)
+int sys_vm86(struct pt_regs *regs)
 {
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
@@ -238,12 +239,12 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a
 	struct vm86plus_struct __user *v86;
 
 	tsk = current;
-	switch (cmd) {
+	switch (regs->bx) {
 	case VM86_REQUEST_IRQ:
 	case VM86_FREE_IRQ:
 	case VM86_GET_IRQ_BITS:
 	case VM86_GET_AND_RESET_IRQ:
-		ret = do_vm86_irq_handling(cmd, (int)arg);
+		ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
 		goto out;
 	case VM86_PLUS_INSTALL_CHECK:
 		/*
@@ -260,7 +261,7 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a
 	ret = -EPERM;
 	if (tsk->thread.saved_sp0)
 		goto out;
-	v86 = (struct vm86plus_struct __user *)arg;
+	v86 = (struct vm86plus_struct __user *)regs->cx;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, regs32) -
 				       sizeof(info.regs));
-- 
cgit v0.10.2


From 744525092727827a9cf0044074db3e22dcf354fd Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 11 Feb 2009 16:31:40 -0800
Subject: x86: merge sys_rt_sigreturn between 32 and 64 bits

Impact: cleanup

With the recent changes in the 32-bit code to make system calls which
use struct pt_regs take a pointer, sys_rt_sigreturn() have become
identical between 32 and 64 bits, and both are empty wrappers around
do_rt_sigreturn().  Remove both wrappers and rename both to
sys_rt_sigreturn().

Cc: Brian Gerst <brgerst@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 77bb31a..68b1be1 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 int sys_sigaltstack(struct pt_regs *);
 unsigned long sys_sigreturn(struct pt_regs *);
-int sys_rt_sigreturn(struct pt_regs *);
+long sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/ioport.c */
 long sys_iopl(struct pt_regs *);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ccfb274..7cdcd16 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,7 +601,7 @@ badframe:
 }
 #endif /* CONFIG_X86_32 */
 
-static long do_rt_sigreturn(struct pt_regs *regs)
+long sys_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	unsigned long ax;
@@ -632,18 +632,6 @@ badframe:
 	return 0;
 }
 
-#ifdef CONFIG_X86_32
-int sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
-
 /*
  * OK, we're invoking a handler:
  */
-- 
cgit v0.10.2


From 58105ef1857112a186696c9b8957020090226a28 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 31 Jan 2009 12:32:26 -0800
Subject: x86: UV: fix header struct usage

Impact: Fixes warning

Fix uv.h struct usage:

arch/x86/include/asm/uv/uv.h:16: warning: 'struct mm_struct' declared inside parameter list
arch/x86/include/asm/uv/uv.h:16: warning: its scope is only this definition or declaration, which is probably not what you want

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 8ac1d7e..8242bf9 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -3,6 +3,9 @@
 
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 
+struct cpumask;
+struct mm_struct;
+
 #ifdef CONFIG_X86_UV
 
 extern enum uv_system_type get_uv_system_type(void);
-- 
cgit v0.10.2