From 38cb47ba0187c481aa949d3bbf149e014e8cacda Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:47:54 +0100
Subject: x86: relax RAM check in ioremap()

Kevin Winchester reported the loss of direct rendering, due to:

[    0.588184] agpgart: Detected AGP bridge 0
[    0.588184] agpgart: unable to get memory for graphics translation table.
[    0.588184] agpgart: agp_backend_initialize() failed.
[    0.588207] agpgart-amd64: probe of 0000:00:00.0 failed with error -12

and bisected it down to:

  commit 266b9f8727976769e2ed2dad77ac9295f37e321e
  Author: Thomas Gleixner <tglx@linutronix.de>
  Date:   Wed Jan 30 13:34:06 2008 +0100

      x86: fix ioremap RAM check

this check was too strict and caused an ioremap() failure.

the problem is due to the somewhat unclean way of how the GART code
reserves a memory range for its aperture, and how it utilizes it
later on.

Allow RAM pages to be ioremap()-ed too, as long as they are reserved.

Bisected-by: Kevin Winchester <kjwinchester@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Kevin Winchester <kjwinchester@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94..1a88d15 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -116,7 +116,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 {
 	void __iomem *addr;
 	struct vm_struct *area;
-	unsigned long offset, last_addr;
+	unsigned long pfn, offset, last_addr;
 	pgprot_t prot;
 
 	/* Don't allow wraparound or zero size */
@@ -133,9 +133,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
-	     (offset << PAGE_SHIFT) < last_addr; offset++) {
-		if (page_is_ram(offset))
+	for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
+	     (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+		if (page_is_ram(pfn) && pfn_valid(pfn) &&
+		    !PageReserved(pfn_to_page(pfn)))
 			return NULL;
 	}
 
-- 
cgit v0.10.2


From 262d5ee27271703a0396d63649430f43f3b5deb3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:47:54 +0100
Subject: x86: VMI fix

Jeff Chua bisected down a vmware guest boot breakage (hang) to
this paravirt change:

  commit 8d947344c47a40626730bb80d136d8daac9f2060
  Author: Glauber de Oliveira Costa <gcosta@redhat.com>
  Date:   Wed Jan 30 13:31:12 2008 +0100

    x86: change write_idt_entry signature

fix the off-by-one indexing bug ...

Bisected-by: Jeff Chua <jeff.chua.linux@gmail.com>
Tested-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2..12affe1 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
 static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	u32 *idt_entry = (u32 *)g;
-	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
 }
 
 static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
 				const void *desc, int type)
 {
 	u32 *gdt_entry = (u32 *)desc;
-	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
 }
 
 static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
 				const void *desc)
 {
 	u32 *ldt_entry = (u32 *)desc;
-	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
 }
 
 static void vmi_load_sp0(struct tss_struct *tss,
-- 
cgit v0.10.2


From 3a900d89db35c133bc0874e71d9156b22db362b4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Mon, 4 Feb 2008 16:47:55 +0100
Subject: x86: restore correct module name for apm

The apm module were renamed to apm_32 during the merge of 32 and 64 bit
x86 which is unfortunate. As apm is 32 bit specific we like to keep the
_32 in the filename but the module should be named apm.

Fix this in the Makefile.

Reported-by: "A.E.Lawrence" <lawrence_a_e@ntlworld.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "A.E.Lawrence" <lawrence_a_e@ntlworld.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f81300..f080635 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
-obj-$(CONFIG_APM)		+= apm_32.o
+apm-y				:= apm_32.o
+obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_X86_SMP)		+= smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
 obj-$(CONFIG_X86_32_SMP)	+= smpcommon_32.o
 obj-$(CONFIG_X86_64_SMP)	+= smp_64.o smpboot_64.o tsc_sync.o
-- 
cgit v0.10.2


From 3bc9a77e84096148d5ada29c986d6e71a20eaeda Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Mon, 4 Feb 2008 16:47:55 +0100
Subject: x86: rename module scx200_32 to scx200

The module scx200 were renamed to scx200_32 by the
merge of the 32 and 64 bit x86 arch trees.

Keep the _32 prefix on the .c file as it is 32 bit
specific and fix the module name in the Makefile.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f080635..21dc1a0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -75,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
 obj-y				+= pcspeaker.o
 endif
 
-obj-$(CONFIG_SCx200)		+= scx200_32.o
+obj-$(CONFIG_SCx200)		+= scx200.o
+scx200-y			+= scx200_32.o
 
 ###
 # 64 bit specific files
-- 
cgit v0.10.2


From 4cf31841762954ad2868156ccba94d798a16630f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@telecomint.eu>
Date: Mon, 4 Feb 2008 16:47:55 +0100
Subject: x86: mach-rdc321x Kconfig fix

The mach-rdc321x uses the leds-gpio driver and explicitely
selects it, this driver also depends on the leds class module,
select it as well.

Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7109037..77198f4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -309,6 +309,7 @@ config X86_RDC321X
 	select M486
 	select X86_REBOOTFIXUPS
 	select GENERIC_GPIO
+	select LEDS_CLASS
 	select LEDS_GPIO
 	help
 	  This option is needed for RDC R-321x system-on-chip, also known
-- 
cgit v0.10.2


From b50516fc20f756cf4d18a89f6f9977d60151ccba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:47:55 +0100
Subject: x86: CPA remove bogus NX clear

In split_large_page we clear the NX bit for the new split ptes, but we
need to preserve the original setting of it for the split ptes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd6..877b5cc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -225,7 +225,6 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 #endif
 
-	pgprot_val(ref_prot) &= ~_PAGE_NX;
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
 		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
 
-- 
cgit v0.10.2


From 6118f76fb7408bad7631345cc41a5f0efc49ce3e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date: Mon, 4 Feb 2008 16:47:56 +0100
Subject: x86: print out node_data addr and bootmap_start addr

print out node_data addr and bootmap_start addr.

helpful for debugging early crashes on high-end NUMA systems.

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09..5a02bf4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	if (node_data[nodeid] == NULL)
 		return;
 	nodedata_phys = __pa(node_data[nodeid]);
+	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+		nodedata_phys + pgdat_size - 1);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
 	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		return;
 	}
 	bootmap_start = __pa(bootmap);
-	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
 
 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
 					 bootmap_start >> PAGE_SHIFT,
 					 start_pfn, end_pfn);
 
+	printk(KERN_INFO "  bootmap [%016lx -  %016lx] pages %lx\n",
+		 bootmap_start, bootmap_start + bootmap_size - 1,
+		 bootmap_pages);
+
 	free_bootmem_with_active_regions(nodeid, end);
 
 	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
-- 
cgit v0.10.2


From cf89ec924da5b76cbff293a1b378f312c7161411 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:47:56 +0100
Subject: x86: reduce ifdef sections in fault.c

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0..3fff490 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -508,6 +508,10 @@ static int vmalloc_fault(unsigned long address)
 	pmd_t *pmd, *pmd_ref;
 	pte_t *pte, *pte_ref;
 
+	/* Make sure we are in vmalloc area */
+	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+		return -1;
+
 	/* Copy kernel mappings over when needed. This can also
 	   happen within a race in page table update. In the later
 	   case just flush. */
@@ -603,6 +607,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 */
 #ifdef CONFIG_X86_32
 	if (unlikely(address >= TASK_SIZE)) {
+#else
+	if (unlikely(address >= TASK_SIZE64)) {
+#endif
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
@@ -618,6 +625,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		goto bad_area_nosemaphore;
 	}
 
+
+#ifdef CONFIG_X86_32
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
 	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +639,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 #else /* CONFIG_X86_64 */
-	if (unlikely(address >= TASK_SIZE64)) {
-		/*
-		 * Don't check for the module range here: its PML4
-		 * is always initialized because it's shared with the main
-		 * kernel text. Only vmalloc may need PML4 syncups.
-		 */
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
-			if (vmalloc_fault(address) >= 0)
-				return;
-		}
-
-		/* Can handle a stale RO->RW TLB */
-		if (spurious_fault(address, error_code))
-			return;
-
-		/*
-		 * Don't take the mm semaphore here. If we fixup a prefetch
-		 * fault we could otherwise deadlock.
-		 */
-		goto bad_area_nosemaphore;
-	}
 	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 
-- 
cgit v0.10.2


From 1622ac23bd3568c3ae8bb391dd3adb51887d7141 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Mon, 4 Feb 2008 16:47:56 +0100
Subject: x86: define OBJCOPYFLAGS explicitly for each target.

Do this rather than defining a global version and overriding it in
almost all cases in order to make subsequent patches simpler.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98..364865b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
 KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
 
 # Speed up the build
 KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a..254a583 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -80,6 +80,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
 	$(call if_changed,image)
 	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 	$(call if_changed,objcopy)
 
@@ -90,7 +91,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
 	$(call if_changed,ld)
 
 OBJCOPYFLAGS_setup.bin	:= -O binary
-
 $(obj)/setup.bin: $(obj)/setup.elf FORCE
 	$(call if_changed,objcopy)
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24cea..d2b9f3b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
 	$(call if_changed,ld)
 	@:
 
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
-- 
cgit v0.10.2


From a34746bc43eb63e545abf5eb002d96483a54ee32 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:56 +0100
Subject: x86: add _ASM_EXTABLE macro to <asm/asm.h>

Instead of open-coding the __ex_table information at each callsite,
construct a common macro that can work regardless of CPU size.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 1a6980a..90dec0c 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -29,4 +29,11 @@
 
 #endif /* CONFIG_X86_32 */
 
+/* Exception table entry */
+# define _ASM_EXTABLE(from,to) \
+	" .section __ex_table,\"a\"\n" \
+	_ASM_ALIGN "\n" \
+	_ASM_PTR #from "," #to "\n" \
+	" .previous\n"
+
 #endif /* _ASM_X86_ASM_H */
-- 
cgit v0.10.2


From 92909098a3b27147c4b80f9c387ccd63676aa807 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:56 +0100
Subject: x86: use _ASM_EXTABLE macro in arch/x86/kernel/test_nx.c

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in arch/x86/kernel/test_nx.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e..36c100c 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <asm/uaccess.h>
+#include <asm/asm.h>
 
 extern int rodata_test_data;
 
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
 		"2:	mov %[zero], %[rslt]\n"
 		"	ret\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"       .align 8\n"
-#ifdef CONFIG_X86_32
-		"	.long 0b\n"
-		"	.long 2b\n"
-#else
-		"	.quad 0b\n"
-		"	.quad 2b\n"
-#endif
-		".previous\n"
+		_ASM_EXTABLE(0b,2b)
 		: [rslt] "=r" (result)
 		: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
 	);
-- 
cgit v0.10.2


From e7a40d268ec2afab7e0596667cabd2ae53fec8d8 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:57 +0100
Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/mmx_32.c

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in arch/x86/lib/mmx_32.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2..cc9b4a4 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
 #include <linux/hardirq.h>
 #include <linux/module.h>
 
+#include <asm/asm.h>
 #include <asm/i387.h>
 
 
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 		
 	
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 
 	for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 
 	for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
-- 
cgit v0.10.2


From 287774414568010855642518513f085491644061 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:57 +0100
Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/usercopy_32.c

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in arch/x86/lib/usercopy_32.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5..e849b99 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do {									   \
 		"3:	movl %5,%0\n"					   \
 		"	jmp 2b\n"					   \
 		".previous\n"						   \
-		".section __ex_table,\"a\"\n"				   \
-		"	.align 4\n"					   \
-		"	.long 0b,3b\n"					   \
-		".previous"						   \
+		_ASM_EXTABLE(0b,3b)					   \
 		: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
 		  "=&D" (__d2)						   \
 		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do {									\
 		"3:	lea 0(%2,%0,4),%0\n"				\
 		"	jmp 2b\n"					\
 		".previous\n"						\
-		".section __ex_table,\"a\"\n"				\
-		"	.align 4\n"					\
-		"	.long 0b,3b\n"					\
-		"	.long 1b,2b\n"					\
-		".previous"						\
+		_ASM_EXTABLE(0b,3b)					\
+		_ASM_EXTABLE(1b,2b)					\
 		: "=&c"(size), "=&D" (__d0)				\
 		: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));	\
 } while (0)
-- 
cgit v0.10.2


From 8da804f2b23913ef362c6a578bf482e5ccc93d1a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:57 +0100
Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/usercopy_64.c

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in arch/x86/lib/usercopy_64.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f..0c89d1b 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do {									   \
 		"3:	movq %5,%0\n"					   \
 		"	jmp 2b\n"					   \
 		".previous\n"						   \
-		".section __ex_table,\"a\"\n"				   \
-		"	.align 8\n"					   \
-		"	.quad 0b,3b\n"					   \
-		".previous"						   \
+		_ASM_EXTABLE(0b,3b)					   \
 		: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
 		  "=&D" (__d2)						   \
 		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 		"3:	lea 0(%[size1],%[size8],8),%[size8]\n"
 		"	jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"       .align 8\n"
-		"	.quad 0b,3b\n"
-		"	.quad 1b,2b\n"
-		".previous"
+		_ASM_EXTABLE(0b,3b)
+		_ASM_EXTABLE(1b,2b)
 		: [size8] "=c"(size), [dst] "=&D" (__d0)
 		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
 		  [zero] "r" (0UL), [eight] "r" (8UL));
-- 
cgit v0.10.2


From f832ff18e886ada0ff30a1edeab082ce218d107e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:58 +0100
Subject: x86: use _ASM_EXTABLE macro in arch/x86/mm/init_32.c

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in arch/x86/mm/init_32.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f2f36f8..d1bc040 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/cpumask.h>
 
+#include <asm/asm.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
 		"1:	movb %1, %0	\n"
 		"	xorl %2, %2	\n"
 		"2:			\n"
-		".section __ex_table, \"a\"\n"
-		"	.align 4	\n"
-		"	.long 1b, 2b	\n"
-		".previous		\n"
+		_ASM_EXTABLE(1b,2b)
 		:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
 		 "=q" (tmp_reg),
 		 "=r" (flag)
-- 
cgit v0.10.2


From 2532ec6d178abc55681d049097d3dc577eaa266c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:58 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/futex.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/futex.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index 9d91926..cd9f894 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -17,11 +17,8 @@
 "2:	.section .fixup,\"ax\"\n				\
 3:	mov	%3, %1\n					\
 	jmp	2b\n						\
-	.previous\n						\
-	.section __ex_table,\"a\"\n				\
-	.align	8\n"						\
-	_ASM_PTR "1b,3b\n					\
-	.previous"						\
+	.previous\n"						\
+	_ASM_EXTABLE(1b,3b)					\
 	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
 	: "i" (-EFAULT), "0" (oparg), "1" (0))
 
@@ -35,11 +32,9 @@
 3:	.section .fixup,\"ax\"\n				\
 4:	mov	%5, %1\n					\
 	jmp	3b\n						\
-	.previous\n						\
-	.section __ex_table,\"a\"\n				\
-	.align	8\n"						\
-	_ASM_PTR "1b,4b,2b,4b\n					\
-	.previous"						\
+	.previous\n"						\
+	_ASM_EXTABLE(1b,4b)					\
+	_ASM_EXTABLE(2b,4b)					\
 	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
 	  "=&r" (tem)						\
 	: "r" (oparg), "i" (-EFAULT), "1" (0))
@@ -111,18 +106,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 		return -EFAULT;
 
 	__asm__ __volatile__(
-
 		"1:	lock; cmpxchgl %3, %1			\n"
 		"2:	.section .fixup, \"ax\"			\n"
 		"3:	mov     %2, %0				\n"
 		"	jmp     2b				\n"
 		"	.previous				\n"
-
-		"	.section __ex_table, \"a\"		\n"
-		"	.align  8				\n"
-			_ASM_PTR " 1b,3b			\n"
-		"	.previous				\n"
-
+		_ASM_EXTABLE(1b,3b)
 		: "=a" (oldval), "+m" (*uaddr)
 		: "i" (-EFAULT), "r" (newval), "0" (oldval)
 		: "memory"
-- 
cgit v0.10.2


From 92c37fa3256dd8ace1cc37674146abd286e3b8b0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:58 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/i387.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/i387.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index ba8105c..6b1895c 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
+#include <asm/asm.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
@@ -41,10 +42,7 @@ static inline void tolerant_fwait(void)
 {
 	asm volatile("1: fwait\n"
 		     "2:\n"
-		     "   .section __ex_table,\"a\"\n"
-		     "	.align 8\n"
-		     "	.quad 1b,2b\n"
-		     "	.previous\n");
+		     _ASM_EXTABLE(1b,2b));
 }
 
 static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
@@ -57,10 +55,7 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
 		     "3:  movl $-1,%[err]\n"
 		     "    jmp  2b\n"
 		     ".previous\n"
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  1b,3b\n"
-		     ".previous"
+		     _ASM_EXTABLE(1b,3b)
 		     : [err] "=r" (err)
 #if 0 /* See comment in __save_init_fpu() below. */
 		     : [fx] "r" (fx), "m" (*fx), "0" (0));
@@ -99,10 +94,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
 		     "3:  movl $-1,%[err]\n"
 		     "    jmp  2b\n"
 		     ".previous\n"
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  1b,3b\n"
-		     ".previous"
+		     _ASM_EXTABLE(1b,3b)
 		     : [err] "=r" (err), "=m" (*fx)
 #if 0 /* See comment in __fxsave_clear() below. */
 		     : [fx] "r" (fx), "0" (0));
-- 
cgit v0.10.2


From 7d24a827087e0cf6834a3d8f20c4b5fc4cebd7fc Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:58 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/msr.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/msr.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 204a8a3..3ca29eb 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -57,10 +57,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 		     ".section .fixup,\"ax\"\n\t"
 		     "3:  mov %3,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
-		     ".section __ex_table,\"a\"\n"
-		     _ASM_ALIGN "\n\t"
-		     _ASM_PTR " 2b,3b\n\t"
-		     ".previous"
+		     _ASM_EXTABLE(2b,3b)
 		     : "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : "c" (msr), "i" (-EFAULT));
 	return EAX_EDX_VAL(val, low, high);
@@ -81,10 +78,7 @@ static inline int native_write_msr_safe(unsigned int msr,
 		     ".section .fixup,\"ax\"\n\t"
 		     "3:  mov %4,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
-		     ".section __ex_table,\"a\"\n"
-		     _ASM_ALIGN "\n\t"
-		     _ASM_PTR " 2b,3b\n\t"
-		     ".previous"
+		     _ASM_EXTABLE(2b,3b)
 		     : "=a" (err)
 		     : "c" (msr), "0" (low), "d" (high),
 		       "i" (-EFAULT));
-- 
cgit v0.10.2


From 88976ee187dce4c8de56e25955631de9765d96d1 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:58 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/system.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/system.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index ee32ef9..428348e 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -130,10 +130,7 @@ extern void load_gs_index(unsigned);
 		"movl %k1, %%" #seg "\n\t"	\
 		"jmp 2b\n"			\
 		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		_ASM_ALIGN "\n\t"		\
-		_ASM_PTR " 1b,3b\n"		\
-		".previous"			\
+		_ASM_EXTABLE(1b,3b)		\
 		: :"r" (value), "r" (0))
 
 
@@ -214,12 +211,10 @@ static inline unsigned long native_read_cr4_safe(void)
 	/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
 	 * exists, so it will never fail. */
 #ifdef CONFIG_X86_32
-	asm volatile("1: mov %%cr4, %0		\n"
-		"2:				\n"
-		".section __ex_table,\"a\"	\n"
-		".long 1b,2b			\n"
-		".previous			\n"
-		: "=r" (val), "=m" (__force_order) : "0" (0));
+	asm volatile("1: mov %%cr4, %0\n"
+		     "2:\n"
+		     _ASM_EXTABLE(1b,2b)
+		     : "=r" (val), "=m" (__force_order) : "0" (0));
 #else
 	val = native_read_cr4();
 #endif
-- 
cgit v0.10.2


From 14e6d17d683c02c114fccdde3a867033e8781416 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:59 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/uaccess_32.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/uaccess_32.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index d2a4f7b..fcc570e 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -8,6 +8,7 @@
 #include <linux/thread_info.h>
 #include <linux/prefetch.h>
 #include <linux/string.h>
+#include <asm/asm.h>
 #include <asm/page.h>
 
 #define VERIFY_READ 0
@@ -287,11 +288,8 @@ extern void __put_user_8(void);
 		"4:	movl %3,%0\n"				\
 		"	jmp 3b\n"				\
 		".previous\n"					\
-		".section __ex_table,\"a\"\n"			\
-		"	.align 4\n"				\
-		"	.long 1b,4b\n"				\
-		"	.long 2b,4b\n"				\
-		".previous"					\
+		_ASM_EXTABLE(1b,4b)				\
+		_ASM_EXTABLE(2b,4b)				\
 		: "=r"(err)					\
 		: "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
 
@@ -338,10 +336,7 @@ struct __large_struct { unsigned long buf[100]; };
 		"3:	movl %3,%0\n"					\
 		"	jmp 2b\n"					\
 		".previous\n"						\
-		".section __ex_table,\"a\"\n"				\
-		"	.align 4\n"					\
-		"	.long 1b,3b\n"					\
-		".previous"						\
+		_ASM_EXTABLE(1b,3b)					\
 		: "=r"(err)						\
 		: ltype (x), "m"(__m(addr)), "i"(errret), "0"(err))
 
@@ -378,10 +373,7 @@ do {									\
 		"	xor"itype" %"rtype"1,%"rtype"1\n"		\
 		"	jmp 2b\n"					\
 		".previous\n"						\
-		".section __ex_table,\"a\"\n"				\
-		"	.align 4\n"					\
-		"	.long 1b,3b\n"					\
-		".previous"						\
+		_ASM_EXTABLE(1b,3b)					\
 		: "=r"(err), ltype (x)					\
 		: "m"(__m(addr)), "i"(errret), "0"(err))
 
-- 
cgit v0.10.2


From 71713eeed0c90bb05c509388609223555575f558 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:59 +0100
Subject: x86: use _ASM_EXTABLE macro in include/asm-x86/uaccess_64.h

Use the _ASM_EXTABLE macro from <asm/asm.h>, instead of open-coding
__ex_table entires in include/asm-x86/uaccess_64.h.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 31d7947..b87eb4b 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -181,10 +181,7 @@ struct __large_struct { unsigned long buf[100]; };
 		"3:	mov %3,%0\n"				\
 		"	jmp 2b\n"				\
 		".previous\n"					\
-		".section __ex_table,\"a\"\n"			\
-		"	.align 8\n"				\
-		"	.quad 1b,3b\n"				\
-		".previous"					\
+		_ASM_EXTABLE(1b,3b)				\
 		: "=r"(err)					\
 		: ltype (x), "m"(__m(addr)), "i"(errno), "0"(err))
 
@@ -226,10 +223,7 @@ do {									\
 		"	xor"itype" %"rtype"1,%"rtype"1\n"	\
 		"	jmp 2b\n"				\
 		".previous\n"					\
-		".section __ex_table,\"a\"\n"			\
-		"	.align 8\n"				\
-		"	.quad 1b,3b\n"				\
-		".previous"					\
+		_ASM_EXTABLE(1b,3b)				\
 		: "=r"(err), ltype (x)				\
 		: "m"(__m(addr)), "i"(errno), "0"(err))
 
-- 
cgit v0.10.2


From 2347d933b158932cf2b8aeebae3e5cc16b200bd1 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:59 +0100
Subject: x86: cpuid: allow querying %ecx-sensitive CPUID levels

After /dev/*/cpuid was introduced, Intel changed the semantics of the
CPUID instruction to be sentitive to %ecx as well as %eax.  This patch
allows querying of %ecx-sensitive levels by placing the %ecx value in
the upper 32 bits of the file position (lower 32 bits always were used
for the %eax value.)

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d..c10ebc4 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
- *   
- *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
  * and then read in chunks of 16 bytes.  A larger size means multiple
  * reads of consecutive levels.
  *
+ * The lower 32 bits of the file position is used as the incoming %eax,
+ * and the upper 32 bits of the file position as the incoming %ecx,
+ * the latter intended for "counting" eax levels like eax=4.
+ *
  * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
  * an SMP box will direct the access to CPU %d.
  */
@@ -43,27 +47,16 @@
 
 static struct class *cpuid_class;
 
-struct cpuid_command {
-	u32 reg;
-	u32 *data;
+struct cpuid_regs {
+	u32 eax, ebx, ecx, edx;
 };
 
 static void cpuid_smp_cpuid(void *cmd_block)
 {
-	struct cpuid_command *cmd = cmd_block;
-
-	cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
-		      &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
-	struct cpuid_command cmd;
+	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
 
-	cmd.reg = reg;
-	cmd.data = data;
-
-	smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+	cpuid_count(cmd->eax, cmd->ecx,
+		    &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
 }
 
 static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
@@ -93,19 +86,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 			  size_t count, loff_t * ppos)
 {
 	char __user *tmp = buf;
-	u32 data[4];
-	u32 reg = *ppos;
+	struct cpuid_regs cmd;
 	int cpu = iminor(file->f_path.dentry->d_inode);
+	u64 pos = *ppos;
 
 	if (count % 16)
 		return -EINVAL;	/* Invalid chunk size */
 
 	for (; count; count -= 16) {
-		do_cpuid(cpu, reg, data);
-		if (copy_to_user(tmp, &data, 16))
+		cmd.eax = pos;
+		cmd.ecx = pos >> 32;
+		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+		if (copy_to_user(tmp, &cmd, 16))
 			return -EFAULT;
 		tmp += 16;
-		*ppos = reg++;
+		*ppos = ++pos;
 	}
 
 	return tmp - buf;
@@ -193,7 +188,7 @@ static int __init cpuid_init(void)
 	}
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
-		if (err != 0) 
+		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +203,7 @@ out_class:
 	}
 	class_destroy(cpuid_class);
 out_chrdev:
-	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");	
+	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
 out:
 	return err;
 }
-- 
cgit v0.10.2


From 2b06ac867176d5d24757bda7e13f6255d6b96d7b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:47:59 +0100
Subject: x86: cpuid, msr: use inode mutex instead of big kernel lock

Instead of grabbing the BKL on seek, use the inode mutex in the style
of generic_file_llseek().

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index c10ebc4..288e7a6 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -62,9 +62,9 @@ static void cpuid_smp_cpuid(void *cmd_block)
 static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 {
 	loff_t ret;
+	struct inode *inode = file->f_mapping->host;
 
-	lock_kernel();
-
+	mutex_lock(&inode->i_mutex);
 	switch (orig) {
 	case 0:
 		file->f_pos = offset;
@@ -77,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 	default:
 		ret = -EINVAL;
 	}
-
-	unlock_kernel();
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850..af51ea8 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
- *   
- *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
 
 static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 {
-	loff_t ret = -EINVAL;
+	loff_t ret;
+	struct inode *inode = file->f_mapping->host;
 
-	lock_kernel();
+	mutex_lock(&inode->i_mutex);
 	switch (orig) {
 	case 0:
 		file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 	case 1:
 		file->f_pos += offset;
 		ret = file->f_pos;
+		break;
+	default:
+		ret = -EINVAL;
 	}
-	unlock_kernel();
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 84fb144b928744cea8eb39bb4fbc794fcb749175 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:48:00 +0100
Subject: x86: reintroduce volatile keyword in prototype to clflush()

The volatile keyword was removed from the clflush() prototype
in commit e34907ae180f4fe6c28bb4516c679c2f81b0c9ed; the comment there
states:

    x86: remove volatile keyword from clflush.

    the p parameter is an explicit memory reference, and is
    enough to prevent gcc to being nasty here. The volatile
    seems completely not needed.

This reflects incorrect understanding of the function of the volatile
keyword there.  The purpose of the volatile keyword is informing gcc
that it is safe to pass a volatile pointer to this function.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 428348e..e9c15c9 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -271,9 +271,9 @@ static inline void native_wbinvd(void)
 
 #endif /* __KERNEL__ */
 
-static inline void clflush(void *__p)
+static inline void clflush(volatile void *__p)
 {
-	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+	asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
 }
 
 #define nop() __asm__ __volatile__ ("nop")
-- 
cgit v0.10.2


From fa1408e4df53ec1e61f59c030b3488a1ef0c635d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:48:00 +0100
Subject: x86: unify CPU feature string names

Move the CPU feature string names to a separate file (common to 32
and 64 bits); additionally, make <asm/cpufeature.h> includable by host
code in preparation for including the CPU feature strings in the boot
code.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3..a0c4d7c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
+obj-y			+= feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o proc.o bugs.o
 obj-$(CONFIG_X86_32)	+= amd.o
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 0000000..ee975ac
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
+/*
+ * Strings for the various x86 capability flags.
+ *
+ * This file must not contain any executable code.
+ */
+
+#include "asm/cpufeature.h"
+
+/*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned.  Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+const char * const x86_cap_flags[NCAPINTS*32] = {
+	/* Intel-defined */
+	"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+	"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+	"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+	"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+	/* AMD-defined */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+	NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+	"3dnowext", "3dnow",
+
+	/* Transmeta-defined */
+	"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Other (Linux-defined) */
+	"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+	NULL, NULL, NULL, NULL,
+	"constant_tsc", "up", NULL, "arch_perfmon",
+	"pebs", "bts", NULL, NULL,
+	"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Intel-defined (#2) */
+	"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+	"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+	NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* VIA/Cyrix/Centaur-defined */
+	NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+	"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* AMD-defined (#2) */
+	"lahf_lm", "cmp_legacy", "svm", "extapic",
+	"cr8_legacy", "abm", "sse4a", "misalignsse",
+	"3dnowprefetch", "osvw", "ibs", "sse5",
+	"skinit", "wdt", NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Auxiliary (Linux-defined) */
+	"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+const char *const x86_power_flags[32] = {
+	"ts",	/* temperature sensor */
+	"fid",  /* frequency id control */
+	"vid",  /* voltage id control */
+	"ttp",  /* thermal trip */
+	"tm",
+	"stc",
+	"100mhzsteps",
+	"hwpstate",
+	"",	/* tsc invariant mapped to constant_tsc */
+		/* nothing */
+};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0282132..af11d31 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
  */
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	/* 
-	 * These flag bits must match the definitions in <asm/cpufeature.h>.
-	 * NULL means this bit is undefined or reserved; either way it doesn't
-	 * have meaning as far as Linux is concerned.  Note that it's important
-	 * to realize there is a difference between this table and CPUID -- if
-	 * applications want to get the raw CPUID data, they should access
-	 * /dev/cpu/<cpu_nr>/cpuid instead.
-	 */
-	static const char * const x86_cap_flags[] = {
-		/* Intel-defined */
-	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-		/* AMD-defined */
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
-		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-		"3dnowext", "3dnow",
-
-		/* Transmeta-defined */
-		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Other (Linux-defined) */
-		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-		NULL, NULL, NULL, NULL,
-		"constant_tsc", "up", NULL, "arch_perfmon",
-		"pebs", "bts", NULL, "sync_rdtsc",
-		"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Intel-defined (#2) */
-		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-		NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* VIA/Cyrix/Centaur-defined */
-		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-		"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* AMD-defined (#2) */
-		"lahf_lm", "cmp_legacy", "svm", "extapic",
-		"cr8_legacy", "abm", "sse4a", "misalignsse",
-		"3dnowprefetch", "osvw", "ibs", "sse5",
-		"skinit", "wdt", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Auxiliary (Linux-defined) */
-		"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	};
-	static const char * const x86_power_flags[] = {
-		"ts",	/* temperature sensor */
-		"fid",  /* frequency id control */
-		"vid",  /* voltage id control */
-		"ttp",  /* thermal trip */
-		"tm",
-		"stc",
-		"100mhzsteps",
-		"hwpstate",
-		"",	/* constant_tsc - moved to flags */
-		/* nothing */
-	};
 	struct cpuinfo_x86 *c = v;
 	int i, n = 0;
 	int fpu_exception;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c..c8939df 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	struct cpuinfo_x86 *c = v;
 	int cpu = 0, i;
 
-	/*
-	 * These flag bits must match the definitions in <asm/cpufeature.h>.
-	 * NULL means this bit is undefined or reserved; either way it doesn't
-	 * have meaning as far as Linux is concerned.  Note that it's important
-	 * to realize there is a difference between this table and CPUID -- if
-	 * applications want to get the raw CPUID data, they should access
-	 * /dev/cpu/<cpu_nr>/cpuid instead.
-	 */
-	static const char *const x86_cap_flags[] = {
-		/* Intel-defined */
-		"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-		"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-		"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-		"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-		/* AMD-defined */
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
-		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-		"3dnowext", "3dnow",
-
-		/* Transmeta-defined */
-		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Other (Linux-defined) */
-		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-		NULL, NULL, NULL, NULL,
-		"constant_tsc", "up", NULL, "arch_perfmon",
-		"pebs", "bts", NULL, "sync_rdtsc",
-		"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Intel-defined (#2) */
-		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-		NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* VIA/Cyrix/Centaur-defined */
-		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-		"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* AMD-defined (#2) */
-		"lahf_lm", "cmp_legacy", "svm", "extapic",
-		"cr8_legacy", "abm", "sse4a", "misalignsse",
-		"3dnowprefetch", "osvw", "ibs", "sse5",
-		"skinit", "wdt", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Auxiliary (Linux-defined) */
-		"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	};
-	static const char *const x86_power_flags[] = {
-		"ts",	/* temperature sensor */
-		"fid",  /* frequency id control */
-		"vid",  /* voltage id control */
-		"ttp",  /* thermal trip */
-		"tm",
-		"stc",
-		"100mhzsteps",
-		"hwpstate",
-		"",	/* tsc invariant mapped to constant_tsc */
-		/* nothing */
-	};
-
-
 #ifdef CONFIG_SMP
 	cpu = c->cpu_index;
 #endif
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 3fb7dfa..3adc9cf 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -4,9 +4,6 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifndef __ASSEMBLY__
-#include <linux/bitops.h>
-#endif
 #include <asm/required-features.h>
 
 #define NCAPINTS	8	/* N 32-bit words worth of info */
@@ -115,6 +112,13 @@
  */
 #define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */
 
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
+#include <linux/bitops.h>
+
+extern const char * const x86_cap_flags[NCAPINTS*32];
+extern const char * const x86_power_flags[32];
+
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) &&					\
 	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
@@ -204,4 +208,6 @@
 
 #endif /* CONFIG_X86_64 */
 
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+
 #endif /* _ASM_X86_CPUFEATURE_H */
-- 
cgit v0.10.2


From f0be6c6a697c2fe8e2efbe98cd157bdbcff969ae Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:48:00 +0100
Subject: x86 setup: print missing CPU features in cleartext

Instead of obscure numbers, print the list of missing CPU features in
cleartext.  To conserve space, use a host program (mkcpustr.c) to
produce a compact list of mandatory features only.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 254a583..f88458e 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
 #RAMDISK := -DRAMDISK=512
 
 targets		:= vmlinux.bin setup.bin setup.elf zImage bzImage
-subdir- 	:= compressed
+subdir-		:= compressed
 
 setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y		+= video-vesa.o
 setup-y		+= video-bios.o
 
 targets		+= $(setup-y)
-hostprogs-y	:= tools/build
+hostprogs-y	:= mkcpustr tools/build
 
-HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR  $@
+      cmd_cpustr = $(obj)/mkcpustr > $@
+targets		+= cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+	$(call if_changed,cpustr)
 
 # ---------------------------------------------------------------------------
 
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
 
 # Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
-FDARGS = 
+FDARGS =
 # Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
 FDINITRD =
 
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32d..00e19ed 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
 /* -*- linux-c -*- ------------------------------------------------------- *
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
- *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
  * ----------------------------------------------------------------------- */
 
 /*
- * arch/i386/boot/cpu.c
+ * arch/x86/boot/cpu.c
  *
  * Check for obligatory CPU features and abort if the features are not
  * present.
@@ -19,6 +19,8 @@
 #include "bitops.h"
 #include <asm/cpufeature.h>
 
+#include "cpustr.h"
+
 static char *cpu_name(int level)
 {
 	static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
 {
 	u32 *err_flags;
 	int cpu_level, req_level;
+	const unsigned char *msg_strs;
 
 	check_cpu(&cpu_level, &req_level, &err_flags);
 
@@ -51,13 +54,26 @@ int validate_cpu(void)
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
 
+		msg_strs = (const unsigned char *)x86_cap_strs;
+
 		for (i = 0; i < NCAPINTS; i++) {
 			u32 e = err_flags[i];
 
 			for (j = 0; j < 32; j++) {
-				if (e & 1)
-					printf("%d:%d ", i, j);
-
+				int n = (i << 5)+j;
+				if (*msg_strs < n) {
+					/* Skip to the next string */
+					do {
+						msg_strs++;
+					} while (*msg_strs);
+					msg_strs++;
+				}
+				if (e & 1) {
+					if (*msg_strs == n && msg_strs[1])
+						printf("%s ", msg_strs+1);
+					else
+						printf("%d:%d ", i, j);
+				}
 				e >>= 1;
 			}
 		}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 0000000..bbe7695
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../kernel/cpu/feature_names.c"
+
+#if NCAPFLAGS > 8
+# error "Need to adjust the boot code handling of CPUID strings"
+#endif
+
+int main(void)
+{
+	int i;
+	const char *str;
+
+	printf("static const char x86_cap_strs[] = \n");
+
+	for (i = 0; i < NCAPINTS*32; i++) {
+		str = x86_cap_flags[i];
+
+		if (i == NCAPINTS*32-1) {
+			/* The last entry must be unconditional; this
+			   also consumes the compiler-added null character */
+			if (!str)
+				str = "";
+			printf("\t\"\\x%02x\"\"%s\"\n", i, str);
+		} else if (str) {
+			printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+			       "\t\"\\x%02x\"\"%s\\0\"\n"
+			       "#endif\n",
+			       i >> 5, i & 31, i, str);
+		}
+	}
+	printf("\t;\n");
+	return 0;
+}
-- 
cgit v0.10.2


From e1adbcf10608c83de6a81a02ebce859611433b52 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 4 Feb 2008 16:48:00 +0100
Subject: asm-generic/tlb.h: remove <linux/quicklist.h>

Remove unused <linux/quicklist.h> from <asm-generic/tlb.h>; per
Christoph Lameter this should have been part of a previous patch
reversal but apparently didn't get removed.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 75f2bfa..f490e43 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -14,7 +14,6 @@
 #define _ASM_GENERIC__TLB_H
 
 #include <linux/swap.h>
-#include <linux/quicklist.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-- 
cgit v0.10.2


From 9a6b344ea967efa0bb5ca4cb5405f840652b66c4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:01 +0100
Subject: x86: remove long dead cyrix mtrr code

cyrix_arr_init was #if 0 all the way back to at least v2.6.12.

This was the only place where arr3_protected was set to anything
but zero.  Eliminate this variable.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c7..ff14c32 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
 #include <asm/processor-flags.h>
 #include "mtrr.h"
 
-int arr3_protected;
-
 static void
 cyrix_get_arr(unsigned int reg, unsigned long *base,
 	      unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 	case 4:
 		return replace_reg;
 	case 3:
-		if (arr3_protected)
-			break;
 	case 2:
 	case 1:
 	case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 	} else {
 		for (i = 0; i < 7; i++) {
 			cyrix_get_arr(i, &lbase, &lsize, &ltype);
-			if ((i == 3) && arr3_protected)
-				continue;
 			if (lsize == 0)
 				return i;
 		}
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
 	post_set();
 }
 
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- *   if it is set, write a warning message: ARR3 cannot be changed!
- *     (it cannot be changed until the next processor reset)
- *   if it is reset, then we can change it, set all the needed bits:
- *   - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- *   - disable access to SMM memory (CCR1 bit 2 reset)
- *   - disable SMM mode (CCR1 bit 1 reset)
- *   - disable write protection of ARR3 (CCR6 bit 1 reset)
- *   - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
-	struct set_mtrr_context ctxt;
-	unsigned char ccr[7];
-	int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
-	int i;
-#endif
-
-	/* flush cache and enable MAPEN */
-	set_mtrr_prepare_save(&ctxt);
-	set_mtrr_cache_disable(&ctxt);
-
-	/* Save all CCRs locally */
-	ccr[0] = getCx86(CX86_CCR0);
-	ccr[1] = getCx86(CX86_CCR1);
-	ccr[2] = getCx86(CX86_CCR2);
-	ccr[3] = ctxt.ccr3;
-	ccr[4] = getCx86(CX86_CCR4);
-	ccr[5] = getCx86(CX86_CCR5);
-	ccr[6] = getCx86(CX86_CCR6);
-
-	if (ccr[3] & 1) {
-		ccrc[3] = 1;
-		arr3_protected = 1;
-	} else {
-		/* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
-		 * access to SMM memory through ARR3 (bit 7).
-		 */
-		if (ccr[1] & 0x80) {
-			ccr[1] &= 0x7f;
-			ccrc[1] |= 0x80;
-		}
-		if (ccr[1] & 0x04) {
-			ccr[1] &= 0xfb;
-			ccrc[1] |= 0x04;
-		}
-		if (ccr[1] & 0x02) {
-			ccr[1] &= 0xfd;
-			ccrc[1] |= 0x02;
-		}
-		arr3_protected = 0;
-		if (ccr[6] & 0x02) {
-			ccr[6] &= 0xfd;
-			ccrc[6] = 1;	/* Disable write protection of ARR3 */
-			setCx86(CX86_CCR6, ccr[6]);
-		}
-		/* Disable ARR3. This is safe now that we disabled SMM. */
-		/* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
-	}
-	/* If we changed CCR1 in memory, change it in the processor, too. */
-	if (ccrc[1])
-		setCx86(CX86_CCR1, ccr[1]);
-
-	/* Enable ARR usage by the processor */
-	if (!(ccr[5] & 0x20)) {
-		ccr[5] |= 0x20;
-		ccrc[5] = 1;
-		setCx86(CX86_CCR5, ccr[5]);
-	}
-#ifdef CONFIG_SMP
-	for (i = 0; i < 7; i++)
-		ccr_state[i] = ccr[i];
-	for (i = 0; i < 8; i++)
-		cyrix_get_arr(i,
-			      &arr_state[i].base, &arr_state[i].size,
-			      &arr_state[i].type);
-#endif
-
-	set_mtrr_done(&ctxt);	/* flush cache and disable MAPEN */
-
-	if (ccrc[5])
-		printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
-	if (ccrc[3])
-		printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
-    if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
-    if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
-    if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
-	if (ccrc[6])
-		printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
 static struct mtrr_ops cyrix_mtrr_ops = {
 	.vendor            = X86_VENDOR_CYRIX,
 //	.init              = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7159195..822d8f90c 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
-#ifndef CONFIG_X86_64
-extern int arr3_protected;
-#else
-#define arr3_protected 0
-#endif
-
 void set_mtrr_ops(struct mtrr_ops * ops)
 {
 	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
 		goto out;
 	}
-	if (is_cpu(CYRIX) && !use_intel()) {
-		if ((reg == 3) && arr3_protected) {
-			printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
-			goto out;
-		}
-	}
 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
 	if (lsize < 1) {
 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
-- 
cgit v0.10.2


From 9b4239346136f1432e52d14ea88f4b2662876f4a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:01 +0100
Subject: x86: sparse errors from string_32.h

include/asm/string_32.h:216:26: warning: cast truncates bits from constant value (cccccccc becomes cc)
include/asm/string_32.h:219:27: warning: cast truncates bits from constant value (cccccccc becomes cccc)
include/asm/string_32.h:222:27: warning: cast truncates bits from constant value (cccccccc becomes cccc)
include/asm/string_32.h:223:30: warning: cast truncates bits from constant value (cccccccc becomes cc)

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index 55bfa30..c5d13a8 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -213,14 +213,14 @@ static __always_inline void * __constant_c_and_count_memset(void * s, unsigned l
 		case 0:
 			return s;
 		case 1:
-			*(unsigned char *)s = pattern;
+			*(unsigned char *)s = pattern & 0xff;
 			return s;
 		case 2:
-			*(unsigned short *)s = pattern;
+			*(unsigned short *)s = pattern & 0xffff;
 			return s;
 		case 3:
-			*(unsigned short *)s = pattern;
-			*(2+(unsigned char *)s) = pattern;
+			*(unsigned short *)s = pattern & 0xffff;
+			*(2+(unsigned char *)s) = pattern & 0xff;
 			return s;
 		case 4:
 			*(unsigned long *)s = pattern;
-- 
cgit v0.10.2


From 94a8a7acbe4d9aa83d53597516cc71101ebd2f6d Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 4 Feb 2008 16:48:01 +0100
Subject: x86: remove misleading comments in trampoline_*.S

Both trampolines actually *do* set up stack. (Is the "we jump into
compressed/head.S" comment still true?)

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6..6458067 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
  *	trampoline page to make our stack and everything else
  *	is a mystery.
  *
- *	In fact we don't actually need a stack so we don't
- *	set one up.
- *
- *	We jump into the boot/compressed/head.S code. So you'd
- *	better be running a compressed kernel image or you
- *	won't get very far.
+ *	We jump into arch/x86/kernel/head_32.S.
  *
  *	On entry to trampoline_data, the processor is in real mode
  *	with 16-bit addressing and 16-bit data.  CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c..4aedd0b 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
  *	trampoline page to make our stack and everything else
  *	is a mystery.
  *
- *	In fact we don't actually need a stack so we don't
- *	set one up.
- *
  *	On entry to trampoline_data, the processor is in real mode
  *	with 16-bit addressing and 16-bit data.  CS has some value
  *	and IP is zero.  Thus, data addresses need to be absolute
-- 
cgit v0.10.2


From c66315e0a785e95884b23887c1aa479dc0b32beb Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Mon, 4 Feb 2008 16:48:02 +0100
Subject: documentation: add Documentation/x86-64/00-INDEX

Signed-off-by: Rob Landley <rob@landley.net>
Cc: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/x86_64/00-INDEX b/Documentation/x86_64/00-INDEX
new file mode 100644
index 0000000..92fc20a
--- /dev/null
+++ b/Documentation/x86_64/00-INDEX
@@ -0,0 +1,16 @@
+00-INDEX
+	- This file
+boot-options.txt
+	- AMD64-specific boot options.
+cpu-hotplug-spec
+	- Firmware support for CPU hotplug under Linux/x86-64
+fake-numa-for-cpusets
+	- Using numa=fake and CPUSets for Resource Management
+kernel-stacks
+	- Context-specific per-processor interrupt stacks.
+machinecheck
+	- Configurable sysfs parameters for the x86-64 machine check code.
+mm.txt
+	- Memory layout of x86-64 (4 level page tables, 46 bits physical).
+uefi.txt
+	- Booting Linux via Unified Extensible Firmware Interface.
-- 
cgit v0.10.2


From e618c9579c745742c422b7c3de1f802aa67e6110 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 4 Feb 2008 16:48:02 +0100
Subject: x86: unify PAE/non-PAE pgd_ctor

The constructors for PAE and non-PAE pgd_ctors are more or less
identical, and can be made into the same function.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: William Irwin <wli@holomorphy.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa47..f34e33d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
 	list_del(&page->lru);
 }
 
+#define UNSHARED_PTRS_PER_PGD				\
+	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
 
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
-static void pgd_ctor(void *pgd)
+static void pgd_ctor(void *p)
 {
+	pgd_t *pgd = p;
 	unsigned long flags;
 
-	/* !PAE, no pagetable sharing */
+	/* Clear usermode parts of PGD */
 	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
 
 	spin_lock_irqsave(&pgd_lock, flags);
 
-	/* must happen under lock */
-	clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-			swapper_pg_dir + USER_PTRS_PER_PGD,
-			KERNEL_PGD_PTRS);
-	paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-				__pa(swapper_pg_dir) >> PAGE_SHIFT,
-				USER_PTRS_PER_PGD,
-				KERNEL_PGD_PTRS);
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else  /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
-	/* PAE, kernel PMD may be shared */
-
-	if (SHARED_KERNEL_PMD) {
-		clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+	/* If the pgd points to a shared pagetable level (either the
+	   ptes in non-PAE, or shared PMD in PAE), then just copy the
+	   references from swapper_pg_dir. */
+	if (PAGETABLE_LEVELS == 2 ||
+	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
 				swapper_pg_dir + USER_PTRS_PER_PGD,
 				KERNEL_PGD_PTRS);
-	} else {
-		unsigned long flags;
+		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+					__pa(swapper_pg_dir) >> PAGE_SHIFT,
+					USER_PTRS_PER_PGD,
+					KERNEL_PGD_PTRS);
+	}
 
-		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-		spin_lock_irqsave(&pgd_lock, flags);
+	/* list required to sync kernel mapping updates */
+	if (!SHARED_KERNEL_PMD)
 		pgd_list_add(pgd);
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
 }
-#endif	/* PTRS_PER_PMD */
 
 static void pgd_dtor(void *pgd)
 {
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-#define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
 #ifdef CONFIG_X86_PAE
 /*
  * Mop up any pmd pages which may still be attached to the pgd.
-- 
cgit v0.10.2


From a67ad9c9f82342a9b320fdad204a490727ef4a18 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 4 Feb 2008 16:48:02 +0100
Subject: x86: revert "defer cr3 reload when doing pud_clear()"

Revert "defer cr3 reload when doing pud_clear()" since I'm going to
replace it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f34e33d..c7db504 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -373,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 
 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
-	/* This is called just after the pmd has been detached from
-	   the pgd, which requires a full tlb flush to be recognized
-	   by the CPU.  Rather than incurring multiple tlb flushes
-	   while the address space is being pulled down, make the tlb
-	   gathering machinery do a full flush when we're done. */
-	tlb->fullmm = 1;
-
 	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index a195c3e..ed4c6f0 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -96,23 +96,14 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 
 	/*
-	 * In principle we need to do a cr3 reload here to make sure
-	 * the processor recognizes the changed pgd.  In practice, all
-	 * the places where pud_clear() gets called are followed by
-	 * full tlb flushes anyway, so we can defer the cost here.
+	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+	 * the TLB via cr3 if the top-level pgd is changed...
 	 *
-	 * Specifically:
-	 *
-	 * mm/memory.c:free_pmd_range() - immediately after the
-	 * pud_clear() it does a pmd_free_tlb().  We change the
-	 * mmu_gather structure to do a full tlb flush (which has the
-	 * effect of reloading cr3) when the pagetable free is
-	 * complete.
-	 *
-	 * arch/x86/mm/hugetlbpage.c:huge_pmd_unshare() - the call to
-	 * this is followed by a flush_tlb_range, which on x86 does a
-	 * full tlb flush.
+	 * XXX I don't think we need to worry about this here, since
+	 * when clearing the pud, the calling code needs to flush the
+	 * tlb anyway.  But do it now for safety's sake. - jsgf
 	 */
+	write_cr3(read_cr3());
 }
 
 #define pud_page(pud) \
-- 
cgit v0.10.2


From edd6bcd8209c31b91e1fbc112a756475091c483d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 4 Feb 2008 16:48:02 +0100
Subject: x86: pud_clear: only reload cr3 if necessary

Rather than unconditionally reloading cr3, only do so if the pud we're
updating is within the active pgd.

This eliminates TLB flushes most of the time.  The
performance-critical uses of pud_clear are during execve and exit, but
in those cases cr3 is referring to some other pagetable.  The only
other use of pud_clear is during a large (1Gbyte+) munmap, and those
are sufficiently rare that a couple of cr3 reloads won't hurt.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index ed4c6f0..ad71960 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -93,17 +93,20 @@ static inline void native_pmd_clear(pmd_t *pmd)
 
 static inline void pud_clear(pud_t *pudp)
 {
+	unsigned long pgd;
+
 	set_pud(pudp, __pud(0));
 
 	/*
 	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
 	 * the TLB via cr3 if the top-level pgd is changed...
 	 *
-	 * XXX I don't think we need to worry about this here, since
-	 * when clearing the pud, the calling code needs to flush the
-	 * tlb anyway.  But do it now for safety's sake. - jsgf
+	 * Make sure the pud entry we're updating is within the
+	 * current pgd to avoid unnecessary TLB flushes.
 	 */
-	write_cr3(read_cr3());
+	pgd = read_cr3();
+	if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
+		write_cr3(pgd);
 }
 
 #define pud_page(pud) \
-- 
cgit v0.10.2


From f5430f93257d336346a9018c915e879ce43f5f89 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 4 Feb 2008 16:48:02 +0100
Subject: x86: update reference for PAE tlb flushing

Remove bogus reference to "Pentium-II erratum A13" and point to the
actual canonical source of information about what requirements x86
processors have for PAE pagetable updates.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index 7641e7b..6c21ef9 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -80,8 +80,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
 
 	/*
-	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
-	 * the TLB via cr3 if the top-level pgd is changed...
+	 * According to Intel App note "TLBs, Paging-Structure Caches,
+	 * and Their Invalidation", April 2007, document 317080-001,
+	 * section 8.1: in PAE mode we explicitly have to flush the
+	 * TLB via cr3 if the top-level pgd is changed...
 	 */
 	if (mm == current->active_mm)
 		write_cr3(read_cr3());
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index ad71960..1d763ee 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -98,8 +98,10 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 
 	/*
-	 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
-	 * the TLB via cr3 if the top-level pgd is changed...
+	 * According to Intel App note "TLBs, Paging-Structure Caches,
+	 * and Their Invalidation", April 2007, document 317080-001,
+	 * section 8.1: in PAE mode we explicitly have to flush the
+	 * TLB via cr3 if the top-level pgd is changed...
 	 *
 	 * Make sure the pud entry we're updating is within the
 	 * current pgd to avoid unnecessary TLB flushes.
-- 
cgit v0.10.2


From fa0c864d998c9c97d11db097d5736028d5c80985 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 4 Feb 2008 16:48:03 +0100
Subject: x86: cleanup - eliminate numbers in LDT allocation code

This patch eliminates numbers in LDT allocation code
trying to make it clear to understand from where
these numbers come.

No code changed:

   text    data     bss     dec     hex filename
   1896       0       0    1896     768 ldt.o.before
   1896       0       0    1896     768 ldt.o.after
md5:
 6cbec8705008ddb4b704aade60bceda3  ldt.o.before.asm
 6cbec8705008ddb4b704aade60bceda3  ldt.o.after.asm

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c..0224c36 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	if (mincount <= pc->size)
 		return 0;
 	oldsize = pc->size;
-	mincount = (mincount + 511) & (~511);
+	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
 	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
 		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
 	else
-- 
cgit v0.10.2


From c7e844f0415252c7e1a2153a97e7a0c511d61ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:03 +0100
Subject: x86: move NUMAQ io handling into arch/x86/pci/numa.c

numa.c is the only user of the {in,out}*_quad functions. And it has only a few call
sites. Change them to open code the magic NUMAQ port access.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f..55270c2 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/nodemask.h>
+#include <mach_apic.h>
 #include "pci.h"
 
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
+
 #define BUS2QUAD(global) (mp_bus_id_to_node[global])
 #define BUS2LOCAL(global) (mp_bus_id_to_local[global])
 #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
 
+extern void *xquad_portio;    /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
 #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
 	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
 
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+	unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+	if (xquad_portio)
+		writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+	else
+		outl(val, 0xCF8);
+}
+
 static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
 			     unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
 
 	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
 		return -EINVAL;
 
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+	write_cf8(bus, devfn, reg);
 
 	switch (len) {
 	case 1:
-		*value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readb(adr + (reg & 3));
+		else
+			*value = inb(0xCFC + (reg & 3));
 		break;
 	case 2:
-		*value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readw(adr + (reg & 2));
+		else
+			*value = inw(0xCFC + (reg & 2));
 		break;
 	case 4:
-		*value = inl_quad(0xCFC, BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readl(adr);
+		else
+			*value = inl(0xCFC);
 		break;
 	}
 
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
 			      unsigned int devfn, int reg, int len, u32 value)
 {
 	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
 
 	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 
 		return -EINVAL;
 
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+	write_cf8(bus, devfn, reg);
 
 	switch (len) {
 	case 1:
-		outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+		if (xquad_portio)
+			writeb(value, adr + (reg & 3));
+		else
+			outb((u8)value, 0xCFC + (reg & 3));
 		break;
 	case 2:
-		outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+		if (xquad_portio)
+			writew(value, adr + (reg & 2));
+		else
+			outw((u16)value, 0xCFC + (reg & 2));
 		break;
 	case 4:
-		outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+		if (xquad_portio)
+			writel(value, adr + reg);
+		else
+			outl((u32)value, 0xCFC);
 		break;
 	}
 
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index 17e183b..3b637fa 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -109,6 +109,8 @@ static inline int mpc_apic_id(struct mpc_config_processor *m,
 	return logical_apicid;
 }
 
+extern void *xquad_portio;
+
 static inline void setup_portio_remap(void)
 {
 	int num_quads = num_online_nodes();
-- 
cgit v0.10.2


From 1fba38703d0ce8a5ff0fad9df3eccc6b55cf2cfb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:03 +0100
Subject: x86: remove special NUMAQ support in io_32.h

Now that the only user does it on its own remove the NUMAQ support macros
in io_32.h

The next step would be to convert the preprocessor mess to actually readable
standard inlines.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 586d7aa..58d2c45 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -275,29 +275,6 @@ static inline void slow_down_io(void) {
 
 #endif
 
-#ifdef CONFIG_X86_NUMAQ
-extern void *xquad_portio;    /* Where the IO area was mapped */
-#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
-#define __BUILDIO(bwl,bw,type) \
-static inline void out##bwl##_quad(unsigned type value, int port, int quad) { \
-	if (xquad_portio) \
-		write##bwl(value, XQUAD_PORT_ADDR(port, quad)); \
-	else \
-		out##bwl##_local(value, port); \
-} \
-static inline void out##bwl(unsigned type value, int port) { \
-	out##bwl##_quad(value, port, 0); \
-} \
-static inline unsigned type in##bwl##_quad(int port, int quad) { \
-	if (xquad_portio) \
-		return read##bwl(XQUAD_PORT_ADDR(port, quad)); \
-	else \
-		return in##bwl##_local(port); \
-} \
-static inline unsigned type in##bwl(int port) { \
-	return in##bwl##_quad(port, 0); \
-}
-#else
 #define __BUILDIO(bwl,bw,type) \
 static inline void out##bwl(unsigned type value, int port) { \
 	out##bwl##_local(value, port); \
@@ -305,8 +282,6 @@ static inline void out##bwl(unsigned type value, int port) { \
 static inline unsigned type in##bwl(int port) { \
 	return in##bwl##_local(port); \
 }
-#endif
-
 
 #define BUILDIO(bwl,bw,type) \
 static inline void out##bwl##_local(unsigned type value, int port) { \
-- 
cgit v0.10.2


From 599db4fe23d3869af98e2addef5628faef550f60 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:03 +0100
Subject: x86: remove final FASTCALL() uses

A few snuck back in to x86.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index 13cdcd6..c25cfca 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -63,8 +63,8 @@ extern pte_t *pkmap_page_table;
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-extern void * FASTCALL(kmap_high(struct page *page));
-extern void FASTCALL(kunmap_high(struct page *page));
+extern void *kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
 
 void *kmap(struct page *page);
 void kunmap(struct page *page);
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index 6d65fbb..ea88054 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -47,7 +47,7 @@ void enable_8259A_irq(unsigned int irq);
 int i8259A_irq_pending(unsigned int irq);
 void make_8259A_irq(unsigned int irq);
 void init_8259A(int aeoi);
-void FASTCALL(send_IPI_self(int vector));
+void send_IPI_self(int vector);
 void init_VISWS_APIC_irqs(void);
 void setup_IO_APIC(void);
 void disable_IO_APIC(void);
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index e9c15c9..9cff02f 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -20,8 +20,8 @@
 #ifdef CONFIG_X86_32
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
-extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
-						struct task_struct *next));
+struct task_struct *__switch_to(struct task_struct *prev,
+				struct task_struct *next);
 
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
-- 
cgit v0.10.2


From 73bdb73f6666228289af4be55a77e2ed978061a7 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:04 +0100
Subject: x86: add include to cpu/intel.c

Fixes sparse warning:

arch/x86/kernel/cpu/intel.c:48:15: warning: symbol 'ppro_with_ram_bug' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b..fae31ce 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
 #include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/ds.h>
+#include <asm/bugs.h>
 
 #include "cpu.h"
 
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 3fcc30d..021cbdd 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -2,6 +2,6 @@
 #define _ASM_X86_BUGS_H
 
 extern void check_bugs(void);
-extern int ppro_with_ram_bug(void);
+int ppro_with_ram_bug(void);
 
 #endif /* _ASM_X86_BUGS_H */
-- 
cgit v0.10.2


From e04f99c987a82f075fcc2bceda351d7610802a88 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:04 +0100
Subject: x86: add function prototype to vm86.h

Global functions should include their prototypes.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index a5edf51..c92fe4a 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -195,6 +195,7 @@ struct kernel_vm86_struct {
 
 void handle_vm86_fault(struct kernel_vm86_regs *, long);
 int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
+struct pt_regs *save_v86_state(struct kernel_vm86_regs *);
 
 struct task_struct;
 void release_vm86_irqs(struct task_struct *);
-- 
cgit v0.10.2


From 7bb308a1eae2a3b869c498017aed15a699d80799 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:04 +0100
Subject: x86: small sparse fix in process_32.c

arch/x86/kernel/process_32.c:254:43: warning: Using plain integer as NULL pointer

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371a..dabdbef 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
 		 * because it has nothing to do.
 		 * Give all the remaining CPUS a kick.
 		 */
-		smp_call_function_mask(map, do_nothing, 0, 0);
+		smp_call_function_mask(map, do_nothing, NULL, 0);
 	} while (!cpus_empty(map));
 
 	set_cpus_allowed(current, tmp);
-- 
cgit v0.10.2


From b6d549a2967881af4f02d02062acbfeb807d44b4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:04 +0100
Subject: x86: add cpu init function defintions to cpu.h

cpu.h was already included everywhere needed.

Fixes following sparse warnings:

arch/x86/kernel/cpu/amd.c:343:12: warning: symbol 'amd_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/cyrix.c:444:12: warning: symbol 'cyrix_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/cyrix.c:456:12: warning: symbol 'nsc_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/centaur.c:467:12: warning: symbol 'centaur_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/transmeta.c:112:12: warning: symbol 'transmeta_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/intel.c:296:12: warning: symbol 'intel_cpu_init' was not declared. Should it be static?
arch/x86/kernel/cpu/nexgen.c:56:12: warning: symbol 'nexgen_init_cpu' was not declared. Should it be static?
arch/x86/kernel/cpu/umc.c:22:12: warning: symbol 'umc_init_cpu' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142..d9313d9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
  * They will insert themselves into the cpu_devs structure.
  * Then, when cpu_init() is called, we can just iterate over that array.
  */
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
 void __init early_cpu_init(void)
 {
 	intel_cpu_init();
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a..e0b38c3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
 extern void early_init_intel(struct cpuinfo_x86 *c);
 extern void early_init_amd(struct cpuinfo_x86 *c);
 
+/* Specific CPU type init functions */
+int intel_cpu_init(void);
+int amd_init_cpu(void);
+int cyrix_init_cpu(void);
+int nsc_init_cpu(void);
+int centaur_init_cpu(void);
+int transmeta_init_cpu(void);
+int nexgen_init_cpu(void);
+int umc_init_cpu(void);
-- 
cgit v0.10.2


From 16c02ed74361433a4fc5d8bd5f67abbac6e1c5ca Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: add cpu mtrr init function definitions to mtrr.h

mtrr.h was included everywhere needed.  Fixes the following sparse
warnings.  Also, the return types in the extern definitions were
incorrect.

arch/x86/kernel/cpu/mtrr/amd.c:113:12: warning: symbol 'amd_init_mtrr' was not declared. Should it be static?
arch/x86/kernel/cpu/mtrr/cyrix.c:268:12: warning: symbol 'cyrix_init_mtrr' was not declared. Should it be static?
arch/x86/kernel/cpu/mtrr/centaur.c:218:12: warning: symbol 'centaur_init_mtrr' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 822d8f90c..1e27b69 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -554,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
  * These should be called implicitly, but we can't yet until all the initcall
  * stuff is done...
  */
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
 static void __init init_ifs(void)
 {
 #ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c..2cc77eb 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
 
+/* CPU specific mtrr init functions */
+int amd_init_mtrr(void);
+int cyrix_init_mtrr(void);
+int centaur_init_mtrr(void);
-- 
cgit v0.10.2


From cc0f21bbc12dc9f05b2e7f2469128f8717b2f4d3 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: teach the static_protection function about high mappings

Right now, enforcing that the high mapping of the kernel text doesn't
get the NX bit is done deep in the guts of CPA, rather than in the
static_protection() function that enforces all other per-arch sanity
checks.

This patch moves this sanity check into the central static_protection()
function instead, and makes it apply ONLY to the kernel text, not to all
other areas in the high mapping.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 877b5cc..bf5e33f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,6 +106,22 @@ static void cpa_flush_range(unsigned long start, int numpages)
 	}
 }
 
+#define HIGH_MAP_START	__START_KERNEL_map
+#define HIGH_MAP_END	(__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+
+/*
+ * Converts a virtual address to a X86-64 highmap address
+ */
+static unsigned long virt_to_highmap(void *address)
+{
+#ifdef CONFIG_X86_64
+	return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
+#else
+	return (unsigned long)address;
+#endif
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -129,12 +145,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
 	 */
 	if (within(address, (unsigned long)_text, (unsigned long)_etext))
 		pgprot_val(forbidden) |= _PAGE_NX;
+	/*
+	 * Do the same for the x86-64 high kernel mapping
+	 */
+	if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+		pgprot_val(forbidden) |= _PAGE_NX;
+
 
 #ifdef CONFIG_DEBUG_RODATA
 	/* The .rodata section needs to be read-only */
 	if (within(address, (unsigned long)__start_rodata,
 				(unsigned long)__end_rodata))
 		pgprot_val(forbidden) |= _PAGE_RW;
+	/*
+	 * Do the same for the x86-64 high kernel mapping
+	 */
+	if (within(address, virt_to_highmap(__start_rodata),
+				virt_to_highmap(__end_rodata)))
+		pgprot_val(forbidden) |= _PAGE_RW;
 #endif
 
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -304,8 +332,6 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
-#define HIGH_MAP_START	__START_KERNEL_map
-#define HIGH_MAP_END	(__START_KERNEL_map + KERNEL_TEXT_SIZE)
 
 static int
 change_page_attr_addr(unsigned long address, pgprot_t mask_set,
@@ -338,10 +364,11 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		/*
 		 * Calc the high mapping address. See __phys_addr()
 		 * for the non obvious details.
+		 *
+		 * Note that NX and other required permissions are
+		 * checked in static_protections().
 		 */
 		address = phys_addr + HIGH_MAP_START - phys_base;
-		/* Make sure the kernel mappings stay executable */
-		pgprot_val(mask_clr) |= _PAGE_NX;
 
 		/*
 		 * Our high aliases are imprecise, because we check
-- 
cgit v0.10.2


From 626c2c9d065da0cbd9997e112501487958fde690 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: use the pfn from the page when change its attributes

When changing the attributes of a pte, we should use the PFN from the
existing PTE rather than going through hoops calculating what we think
it might have been; this is both fragile and totally unneeded. It also
makes it more hairy to call any of these functions on non-direct maps
for no good reason whatsover.

With this change, __change_page_attr() no longer takes a pfn as argument,
which simplifies all the callers.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@tglx.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bf5e33f..6c55fbd 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -277,17 +277,12 @@ out_unlock:
 }
 
 static int
-__change_page_attr(unsigned long address, unsigned long pfn,
-		   pgprot_t mask_set, pgprot_t mask_clr)
+__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr)
 {
 	struct page *kpte_page;
 	int level, err = 0;
 	pte_t *kpte;
 
-#ifdef CONFIG_X86_32
-	BUG_ON(pfn > max_low_pfn);
-#endif
-
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
@@ -298,17 +293,25 @@ repeat:
 	BUG_ON(PageCompound(kpte_page));
 
 	if (level == PG_LEVEL_4K) {
-		pgprot_t new_prot = pte_pgprot(*kpte);
 		pte_t new_pte, old_pte = *kpte;
+		pgprot_t new_prot = pte_pgprot(old_pte);
+
+		if(!pte_val(old_pte)) {
+			WARN_ON_ONCE(1);
+			return -EINVAL;
+		}
 
 		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
 		pgprot_val(new_prot) |= pgprot_val(mask_set);
 
 		new_prot = static_protections(new_prot, address);
 
-		new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
-		BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
-
+		/*
+		 * We need to keep the pfn from the existing PTE,
+		 * after all we're only going to change it's attributes
+		 * not the memory it points to
+		 */
+		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
 		set_pte_atomic(kpte, new_pte);
 	} else {
 		err = split_large_page(kpte, address);
@@ -337,11 +340,11 @@ static int
 change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		      pgprot_t mask_clr)
 {
-	unsigned long phys_addr = __pa(address);
-	unsigned long pfn = phys_addr >> PAGE_SHIFT;
 	int err;
 
 #ifdef CONFIG_X86_64
+	unsigned long phys_addr = __pa(address);
+
 	/*
 	 * If we are inside the high mapped kernel range, then we
 	 * fixup the low mapping first. __va() returns the virtual
@@ -351,7 +354,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		address = (unsigned long) __va(phys_addr);
 #endif
 
-	err = __change_page_attr(address, pfn, mask_set, mask_clr);
+	err = __change_page_attr(address, mask_set, mask_clr);
 	if (err)
 		return err;
 
@@ -375,7 +378,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		 * everything between 0 and KERNEL_TEXT_SIZE, so do
 		 * not propagate lookup failures back to users:
 		 */
-		__change_page_attr(address, pfn, mask_set, mask_clr);
+		__change_page_attr(address, mask_set, mask_clr);
 	}
 #endif
 	return err;
-- 
cgit v0.10.2


From 63c1dcf4bc9a26b1d8baa9a8c7cc1b2e1e694011 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: CPA use the existing pfn in split as well

When splitting large pages, we ge the pfn from the existing entry
instead of calculating it ourself.

This removes the last remaining range restriction of the cpa code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6c55fbd..a629cea5e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -221,8 +221,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 	gfp_t gfp_flags = GFP_KERNEL;
-	unsigned long flags;
-	unsigned long addr;
+	unsigned long flags, addr, pfn;
 	pte_t *pbase, *tmp;
 	struct page *base;
 	unsigned int i, level;
@@ -253,8 +252,12 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 #endif
 
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+	/*
+	 * Get the target pfn from the original entry:
+	 */
+	pfn = pte_pfn(*kpte);
+	for (i = 0; i < PTRS_PER_PTE; i++, pfn++)
+		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	/*
 	 * Install the new, split up pagetable. Important detail here:
-- 
cgit v0.10.2


From e66aadbe6cb90813b3bbf07e3bc2a6aedcef7cd1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: simplify __ioremap

Remove tons of castings which make the code hard to read.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 1a88d15..2c3fa71 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -114,9 +114,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
 static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			       enum ioremap_mode mode)
 {
-	void __iomem *addr;
+	unsigned long pfn, offset, last_addr, vaddr;
 	struct vm_struct *area;
-	unsigned long pfn, offset, last_addr;
 	pgprot_t prot;
 
 	/* Don't allow wraparound or zero size */
@@ -164,19 +163,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
-	addr = (void __iomem *) area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, prot)) {
-		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+	vaddr = (unsigned long) area->addr;
+	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+		remove_vm_area((void *)(vaddr & PAGE_MASK));
 		return NULL;
 	}
 
 	if (ioremap_change_attr(phys_addr, size, mode) < 0) {
-		vunmap(addr);
+		vunmap(area->addr);
 		return NULL;
 	}
 
-	return (void __iomem *) (offset + (char __iomem *)addr);
+	return (void __iomem *) (vaddr + offset);
 }
 
 /**
-- 
cgit v0.10.2


From 75ab43bfce51085ffd627c470f48ae49ba6e6da3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: ioremap remove the range check of cpa

Now that cpa works on non-direct mappings as well, we can safely
remove the range check in ioremap_change_attr().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2c3fa71..4e21231 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
-static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 			       enum ioremap_mode mode)
 {
-	unsigned long vaddr = (unsigned long)__va(paddr);
 	unsigned long nrpages = size >> PAGE_SHIFT;
-	unsigned int level;
 	int err;
 
-	/* No change for pages after the last mapping */
-	if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
-		return 0;
-
-	/*
-	 * If there is no identity map for this address,
-	 * change_page_attr_addr is unnecessary
-	 */
-	if (!lookup_address(vaddr, &level))
-		return 0;
-
 	switch (mode) {
 	case IOR_MODE_UNCACHED:
 	default:
@@ -169,7 +156,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		return NULL;
 	}
 
-	if (ioremap_change_attr(phys_addr, size, mode) < 0) {
+	if (ioremap_change_attr(vaddr, size, mode) < 0) {
 		vunmap(area->addr);
 		return NULL;
 	}
-- 
cgit v0.10.2


From f56d005d30342a45d8af2b75ecccc82200f09600 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:05 +0100
Subject: x86: no CPA on iounmap

When an ioremap is unmapped, do not change the page attributes. There might
be another mapping of the same physical address. PAT might detect a conflicting
mapping attribute for no good reason. The mapping is removed anyway.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4e21231..ee6648f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -240,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
-	/* Reset the direct mapping. Can block */
-	ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
-
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
 	BUG_ON(p != o || o == NULL);
-- 
cgit v0.10.2


From 1c083eb2cbdd917149f6acaa55efca129d05c2a9 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Mon, 4 Feb 2008 16:48:06 +0100
Subject: x86: fix EFI mapping

The patch updates EFI runtime memory mapping code, by making EFI
areas explicitly executable.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324..32dd62b 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
 #endif
 }
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 static void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
-	unsigned long end;
 	void *p;
 
 	if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-		if (md->type == EFI_RUNTIME_SERVICES_CODE &&
-		    (end >> PAGE_SHIFT) <= max_pfn_mapped) {
-			set_memory_x(md->virt_addr, md->num_pages);
-			set_memory_uc(md->virt_addr, md->num_pages);
-		}
+
+		if (md->type != EFI_RUNTIME_SERVICES_CODE)
+			continue;
+
+		set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
 	}
-	__flush_tlb_all();
 }
-#else
-static inline void __init runtime_code_page_mkexec(void) { }
-#endif
 
 /*
  * This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
 {
 	efi_memory_desc_t *md;
 	efi_status_t status;
-	unsigned long end;
-	void *p;
+	unsigned long size;
+	u64 end, systab;
+	void *p, *va;
 
 	efi.systab = NULL;
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-		if ((md->attribute & EFI_MEMORY_WB) &&
-		    ((end >> PAGE_SHIFT) <= max_pfn_mapped))
-			md->virt_addr = (unsigned long)__va(md->phys_addr);
+
+		size = md->num_pages << EFI_PAGE_SHIFT;
+		end = md->phys_addr + size;
+
+		if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
+			va = __va(md->phys_addr);
 		else
-			md->virt_addr = (unsigned long)
-				efi_ioremap(md->phys_addr,
-					    md->num_pages << EFI_PAGE_SHIFT);
-		if (!md->virt_addr)
+			va = efi_ioremap(md->phys_addr, size);
+
+		if (md->attribute & EFI_MEMORY_WB)
+			set_memory_uc(md->virt_addr, size);
+
+		md->virt_addr = (u64) (unsigned long) va;
+
+		if (!va) {
 			printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
 			       (unsigned long long)md->phys_addr);
-		if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
-		    ((unsigned long)efi_phys.systab < end))
-			efi.systab = (efi_system_table_t *)(unsigned long)
-				(md->virt_addr - md->phys_addr +
-				 (unsigned long)efi_phys.systab);
+			continue;
+		}
+
+		systab = (u64) (unsigned long) efi_phys.systab;
+		if (md->phys_addr <= systab && systab < end) {
+			systab += md->virt_addr - md->phys_addr;
+			efi.systab = (efi_system_table_t *) (unsigned long) systab;
+		}
 	}
 
 	BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f237..09d5c23 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
 		else
 			set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
 					    __supported_pte_mask));
-		if (level == 4)
-			start = (start + PMD_SIZE) & PMD_MASK;
-		else
+		if (level == PG_LEVEL_4K)
 			start = (start + PAGE_SIZE) & PAGE_MASK;
+		else
+			start = (start + PMD_SIZE) & PMD_MASK;
 	}
 }
 
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
 				memmap.nr_map * memmap.desc_size);
 }
 
-void __iomem * __init efi_ioremap(unsigned long offset,
-				  unsigned long size)
+void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	static unsigned pages_mapped;
-	unsigned long last_addr;
 	unsigned i, pages;
 
-	last_addr = offset + size - 1;
-	offset &= PAGE_MASK;
-	pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+	/* phys_addr and size must be page aligned */
+	if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+		return NULL;
+
+	pages = size >> PAGE_SHIFT;
 	if (pages_mapped + pages > MAX_EFI_IO_PAGES)
 		return NULL;
 
 	for (i = 0; i < pages; i++) {
 		__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
-			     offset, PAGE_KERNEL_EXEC_NOCACHE);
-		offset += PAGE_SIZE;
+			     phys_addr, PAGE_KERNEL);
+		phys_addr += PAGE_SIZE;
 		pages_mapped++;
 	}
 
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 9c68a1f..ea9734b 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
 	efi_call_virt(f, a1, a2, a3, a4, a5, a6)
 
-#define efi_ioremap(addr, size)			ioremap(addr, size)
+#define efi_ioremap(addr, size)			ioremap_cache(addr, size)
 
 #else /* !CONFIG_X86_32 */
 
@@ -86,7 +86,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern void *efi_ioremap(unsigned long offset, unsigned long size);
+extern void *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
-- 
cgit v0.10.2


From 331e406588dc90331753e6562e5e3757bb907eb8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:06 +0100
Subject: x86: CPA return early when requested feature is not available

Mask out the not supported bits (e.g. NX). If the clr/set masks
are empty after the mask return without changing anything.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a629cea5e..f60b93d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -405,8 +405,18 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages,
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr)
 {
-	int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
-					     mask_clr);
+	int ret;
+
+	/*
+	 * Check, if we are requested to change a not supported
+	 * feature:
+	 */
+	mask_set = canon_pgprot(mask_set);
+	mask_clr = canon_pgprot(mask_clr);
+	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+		return 0;
+
+	ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr);
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-- 
cgit v0.10.2


From 9bf5a47572fe4ea4e5ed2691e4313ea0bb68a74e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:06 +0100
Subject: x86: cpa, add the PAT bit defines

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index cd2524f..44c0a4f 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -13,10 +13,12 @@
 #define _PAGE_BIT_DIRTY		6
 #define _PAGE_BIT_FILE		6
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
+#define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_UNUSED1	9	/* available for programmer */
 #define _PAGE_BIT_UNUSED2	10
 #define _PAGE_BIT_UNUSED3	11
+#define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
 /*
@@ -36,6 +38,8 @@
 #define _PAGE_UNUSED1	(_AC(1, L)<<_PAGE_BIT_UNUSED1)
 #define _PAGE_UNUSED2	(_AC(1, L)<<_PAGE_BIT_UNUSED2)
 #define _PAGE_UNUSED3	(_AC(1, L)<<_PAGE_BIT_UNUSED3)
+#define _PAGE_PAT	(_AC(1, L)<<_PAGE_BIT_PAT)
+#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AC(1, ULL) << _PAGE_BIT_NX)
-- 
cgit v0.10.2


From 6bb8383bebc02dae08a17f561401f58005f75c03 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:06 +0100
Subject: x86: cpa, only flush the cache if the caching attributes have changed

We only need to flush the caches in cpa() if the the caching attributes
have changed. Otherwise only flush the TLBs.

This checks the PAT bits too although they are currently not used by
the kernel.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f60b93d..456ad0a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -52,21 +52,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
 
 static void __cpa_flush_all(void *arg)
 {
+	unsigned long cache = (unsigned long)arg;
+
 	/*
 	 * Flush all to work around Errata in early athlons regarding
 	 * large page flushing.
 	 */
 	__flush_tlb_all();
 
-	if (boot_cpu_data.x86_model >= 4)
+	if (cache && boot_cpu_data.x86_model >= 4)
 		wbinvd();
 }
 
-static void cpa_flush_all(void)
+static void cpa_flush_all(unsigned long cache)
 {
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+	on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
 }
 
 static void __cpa_flush_range(void *arg)
@@ -79,7 +81,7 @@ static void __cpa_flush_range(void *arg)
 	__flush_tlb_all();
 }
 
-static void cpa_flush_range(unsigned long start, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
 {
 	unsigned int i, level;
 	unsigned long addr;
@@ -89,6 +91,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
 
 	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
 
+	if (!cache)
+		return;
+
 	/*
 	 * We only need to flush on one CPU,
 	 * clflush is a MESI-coherent instruction that
@@ -402,10 +407,16 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages,
 	return 0;
 }
 
+static inline int cache_attr(pgprot_t attr)
+{
+	return pgprot_val(attr) &
+		(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+}
+
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr)
 {
-	int ret;
+	int ret, cache;
 
 	/*
 	 * Check, if we are requested to change a not supported
@@ -419,15 +430,21 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr);
 
 	/*
+	 * No need to flush, when we did not set any of the caching
+	 * attributes:
+	 */
+	cache = cache_attr(mask_set);
+
+	/*
 	 * On success we use clflush, when the CPU supports it to
 	 * avoid the wbindv. If the CPU does not support it and in the
 	 * error case we fall back to cpa_flush_all (which uses
 	 * wbindv):
 	 */
 	if (!ret && cpu_has_clflush)
-		cpa_flush_range(addr, numpages);
+		cpa_flush_range(addr, numpages, cache);
 	else
-		cpa_flush_all();
+		cpa_flush_all(cache);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 72e458dfa63b3db7a46f66b0eb19e9ff4e17fc0e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:07 +0100
Subject: x86: introduce struct cpa_data

The number of arguments which need to be transported is increasing
and we want to add flush optimizations and large page preserving.

Create struct cpa data and pass a pointer instead of increasing the
number of arguments further.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 456ad0a..d1c0830 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,13 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+struct cpa_data {
+	unsigned long	vaddr;
+	int		numpages;
+	pgprot_t	mask_set;
+	pgprot_t	mask_clr;
+};
+
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -284,8 +291,7 @@ out_unlock:
 	return 0;
 }
 
-static int
-__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
 	struct page *kpte_page;
 	int level, err = 0;
@@ -305,12 +311,15 @@ repeat:
 		pgprot_t new_prot = pte_pgprot(old_pte);
 
 		if(!pte_val(old_pte)) {
-			WARN_ON_ONCE(1);
+			printk(KERN_WARNING "CPA: called for zero pte. "
+			       "vaddr = %lx cpa->vaddr = %lx\n", address,
+				cpa->vaddr);
+			WARN_ON(1);
 			return -EINVAL;
 		}
 
-		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
-		pgprot_val(new_prot) |= pgprot_val(mask_set);
+		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 
 		new_prot = static_protections(new_prot, address);
 
@@ -343,12 +352,10 @@ repeat:
  * Modules and drivers should use the set_memory_* APIs instead.
  */
 
-
-static int
-change_page_attr_addr(unsigned long address, pgprot_t mask_set,
-		      pgprot_t mask_clr)
+static int change_page_attr_addr(struct cpa_data *cpa)
 {
 	int err;
+	unsigned long address = cpa->vaddr;
 
 #ifdef CONFIG_X86_64
 	unsigned long phys_addr = __pa(address);
@@ -362,7 +369,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		address = (unsigned long) __va(phys_addr);
 #endif
 
-	err = __change_page_attr(address, mask_set, mask_clr);
+	err = __change_page_attr(address, cpa);
 	if (err)
 		return err;
 
@@ -386,20 +393,19 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		 * everything between 0 and KERNEL_TEXT_SIZE, so do
 		 * not propagate lookup failures back to users:
 		 */
-		__change_page_attr(address, mask_set, mask_clr);
+		__change_page_attr(address, cpa);
 	}
 #endif
 	return err;
 }
 
-static int __change_page_attr_set_clr(unsigned long addr, int numpages,
-				      pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr_set_clr(struct cpa_data *cpa)
 {
 	unsigned int i;
 	int ret;
 
-	for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
-		ret = change_page_attr_addr(addr, mask_set, mask_clr);
+	for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) {
+		ret = change_page_attr_addr(cpa);
 		if (ret)
 			return ret;
 	}
@@ -416,6 +422,7 @@ static inline int cache_attr(pgprot_t attr)
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr)
 {
+	struct cpa_data cpa;
 	int ret, cache;
 
 	/*
@@ -427,7 +434,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
 		return 0;
 
-	ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr);
+	cpa.vaddr = addr;
+	cpa.numpages = numpages;
+	cpa.mask_set = mask_set;
+	cpa.mask_clr = mask_clr;
+
+	ret = __change_page_attr_set_clr(&cpa);
 
 	/*
 	 * No need to flush, when we did not set any of the caching
@@ -548,37 +560,26 @@ int set_pages_rw(struct page *page, int numpages)
 	return set_memory_rw(addr, numpages);
 }
 
-
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
-static inline int __change_page_attr_set(unsigned long addr, int numpages,
-					 pgprot_t mask)
-{
-	return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-}
-
-static inline int __change_page_attr_clear(unsigned long addr, int numpages,
-					   pgprot_t mask)
-{
-	return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-}
-#endif
-
 #ifdef CONFIG_DEBUG_PAGEALLOC
 
 static int __set_pages_p(struct page *page, int numpages)
 {
-	unsigned long addr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+				.numpages = numpages,
+				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+				.mask_clr = __pgprot(0)};
 
-	return __change_page_attr_set(addr, numpages,
-				      __pgprot(_PAGE_PRESENT | _PAGE_RW));
+	return __change_page_attr_set_clr(&cpa);
 }
 
 static int __set_pages_np(struct page *page, int numpages)
 {
-	unsigned long addr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+				.numpages = numpages,
+				.mask_set = __pgprot(0),
+				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
 
-	return __change_page_attr_clear(addr, numpages,
-					__pgprot(_PAGE_PRESENT));
+	return __change_page_attr_set_clr(&cpa);
 }
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
-- 
cgit v0.10.2


From f4ae5da0e8e92caa168e7c2a7c4a6c4064b082c2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:07 +0100
Subject: x86: cpa, check if we changed anything and tlb flushing is necessary

Flush tlbs only when there was a real change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d1c0830..79a9f1b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -21,6 +21,7 @@ struct cpa_data {
 	int		numpages;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
+	int		flushtlb;
 };
 
 static inline int
@@ -329,11 +330,19 @@ repeat:
 		 * not the memory it points to
 		 */
 		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
-		set_pte_atomic(kpte, new_pte);
+
+		/*
+		 * Do we really change anything ?
+		 */
+		if (pte_val(old_pte) != pte_val(new_pte)) {
+			set_pte_atomic(kpte, new_pte);
+			cpa->flushtlb = 1;
+		}
 	} else {
 		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;
+		cpa->flushtlb = 1;
 	}
 	return err;
 }
@@ -438,10 +447,17 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
 	cpa.mask_clr = mask_clr;
+	cpa.flushtlb = 0;
 
 	ret = __change_page_attr_set_clr(&cpa);
 
 	/*
+	 * Check whether we really changed something:
+	 */
+	if (!cpa.flushtlb)
+		return ret;
+
+	/*
 	 * No need to flush, when we did not set any of the caching
 	 * attributes:
 	 */
-- 
cgit v0.10.2


From 65e074dffa198978ab0c9976a19b954fbe1183e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:07 +0100
Subject: x86: cpa, preserve large pages if possible

When CPA is called on a range which fits into a large page mapping,
avoid to split the page when:

1) There is no change of attributes
2) The range to change is a complete large mapping

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 79a9f1b..40b7ac5 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -18,12 +18,17 @@
 
 struct cpa_data {
 	unsigned long	vaddr;
-	int		numpages;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
+	int		numpages;
 	int		flushtlb;
 };
 
+enum {
+	CPA_NO_SPLIT = 0,
+	CPA_SPLIT,
+};
+
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -230,6 +235,86 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 #endif
 }
 
+static int try_preserve_large_page(pte_t *kpte, unsigned long address,
+				   struct cpa_data *cpa)
+{
+	unsigned long nextpage_addr, numpages, pmask, psize, flags;
+	pte_t new_pte, old_pte, *tmp;
+	pgprot_t old_prot, new_prot;
+	int level, res = CPA_SPLIT;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	switch (level) {
+	case PG_LEVEL_2M:
+		psize = LARGE_PAGE_SIZE;
+		pmask = LARGE_PAGE_MASK;
+		break;
+	case PG_LEVEL_1G:
+	default:
+		res = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Calculate the number of pages, which fit into this large
+	 * page starting at address:
+	 */
+	nextpage_addr = (address + psize) & pmask;
+	numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+	if (numpages < cpa->numpages)
+		cpa->numpages = numpages;
+
+	/*
+	 * We are safe now. Check whether the new pgprot is the same:
+	 */
+	old_pte = *kpte;
+	old_prot = new_prot = pte_pgprot(old_pte);
+
+	pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+	new_prot = static_protections(new_prot, address);
+
+	/*
+	 * If there are no changes, return. maxpages has been updated
+	 * above:
+	 */
+	if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+		res = CPA_NO_SPLIT;
+		goto out_unlock;
+	}
+
+	/*
+	 * We need to change the attributes. Check, whether we can
+	 * change the large page in one go. We request a split, when
+	 * the address is not aligned and the number of pages is
+	 * smaller than the number of pages in the large page. Note
+	 * that we limited the number of possible pages already to
+	 * the number of pages in the large page.
+	 */
+	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+		/*
+		 * The address is aligned and the number of pages
+		 * covers the full page.
+		 */
+		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+		__set_pmd_pte(kpte, address, new_pte);
+		cpa->flushtlb = 1;
+		res = CPA_NO_SPLIT;
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pgd_lock, flags);
+	return res;
+}
+
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
@@ -295,7 +380,7 @@ out_unlock:
 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
 	struct page *kpte_page;
-	int level, err = 0;
+	int level, res;
 	pte_t *kpte;
 
 repeat:
@@ -338,13 +423,34 @@ repeat:
 			set_pte_atomic(kpte, new_pte);
 			cpa->flushtlb = 1;
 		}
-	} else {
-		err = split_large_page(kpte, address);
-		if (!err)
-			goto repeat;
-		cpa->flushtlb = 1;
+		cpa->numpages = 1;
+		return 0;
 	}
-	return err;
+
+	/*
+	 * Check, whether we can keep the large page intact
+	 * and just change the pte:
+	 */
+	res = try_preserve_large_page(kpte, address, cpa);
+	if (res < 0)
+		return res;
+
+	/*
+	 * When the range fits into the existing large page,
+	 * return. cp->numpages and cpa->tlbflush have been updated in
+	 * try_large_page:
+	 */
+	if (res == CPA_NO_SPLIT)
+		return 0;
+
+	/*
+	 * We have to split the large page:
+	 */
+	res = split_large_page(kpte, address);
+	if (res)
+		return res;
+	cpa->flushtlb = 1;
+	goto repeat;
 }
 
 /**
@@ -410,15 +516,27 @@ static int change_page_attr_addr(struct cpa_data *cpa)
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa)
 {
-	unsigned int i;
-	int ret;
+	int ret, numpages = cpa->numpages;
 
-	for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) {
+	while (numpages) {
+		/*
+		 * Store the remaining nr of pages for the large page
+		 * preservation check.
+		 */
+		cpa->numpages = numpages;
 		ret = change_page_attr_addr(cpa);
 		if (ret)
 			return ret;
-	}
 
+		/*
+		 * Adjust the number of pages with the result of the
+		 * CPA operation. Either a large page has been
+		 * preserved or a single page update happened.
+		 */
+		BUG_ON(cpa->numpages > numpages);
+		numpages -= cpa->numpages;
+		cpa->vaddr += cpa->numpages * PAGE_SIZE;
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From 34508f66b69ff1708192654f631eb8f1d4c52005 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:48:07 +0100
Subject: x86: AMD Athlon X2 hard hang fix

An Athlon 64 X2 test system showed hard hangs shortly after marking
the kernel text read-only, if we tried to preserve largepages and
changed the PSE entry from RW to RO. The pagetable code itself is
correct, it's the CPU that locked up hard (and not even the NMI
watchdog could punch through that hard hang).

So be conservative and always do splitups - like we did in the past.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 40b7ac5..3810f7a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -243,6 +243,17 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address,
 	pgprot_t old_prot, new_prot;
 	int level, res = CPA_SPLIT;
 
+	/*
+	 * An Athlon 64 X2 showed hard hangs if we tried to preserve
+	 * largepages and changed the PSE entry from RW to RO.
+	 *
+	 * As AMD CPUs have a long series of erratas in this area,
+	 * (and none of the known ones seem to explain this hang),
+	 * disable this code until the hang can be debugged:
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return res;
+
 	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	 * Check for races, another CPU might have split this page
-- 
cgit v0.10.2


From 9a14aefc1d28c6037122965ee8c10d92a970ade0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:07 +0100
Subject: x86: cpa, fix lookup_address

lookup_address() returns a wrong level and a wrong pointer to a non
existing pte, when pmd or pud entries are marked !present. This
happens for example due to boot time mapping of GART into the low
memory space.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3810f7a..7d21cd6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -188,6 +188,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
 	return prot;
 }
 
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
 pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
@@ -206,7 +214,7 @@ pte_t *lookup_address(unsigned long address, int *level)
 		return NULL;
 
 	*level = PG_LEVEL_2M;
-	if (pmd_large(*pmd))
+	if (pmd_large(*pmd) || !pmd_present(*pmd))
 		return (pte_t *)pmd;
 
 	*level = PG_LEVEL_4K;
-- 
cgit v0.10.2


From 31422c51e0dc72532d82e80895932d430c3ed307 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE

Fix up all users.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a..e8657b9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-	addl	$(LARGE_PAGE_SIZE -1), %ebx
-	andl	$LARGE_PAGE_MASK, %ebx
+	addl	$(PMD_PAGE_SIZE -1), %ebx
+	andl	$PMD_PAGE_MASK, %ebx
 #else
 	movl	$CONFIG_PHYSICAL_START, %ebx
 #endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
 	/* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-	addq	$(LARGE_PAGE_SIZE - 1), %rbp
-	andq	$LARGE_PAGE_MASK, %rbp
+	addq	$(PMD_PAGE_SIZE - 1), %rbp
+	andq	$PMD_PAGE_MASK, %rbp
 	movq	%rbp, %rbx
 #else
 	movq	$CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a3..4f283ad 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
 
 	/* Is the address not 2M aligned? */
 	movq	%rbp, %rax
-	andl	$~LARGE_PAGE_MASK, %eax
+	andl	$~PMD_PAGE_MASK, %eax
 	testl	%eax, %eax
 	jnz	bad_address
 
@@ -88,7 +88,7 @@ startup_64:
 
 	/* Add an Identity mapping if I am above 1G */
 	leaq	_text(%rip), %rdi
-	andq	$LARGE_PAGE_MASK, %rdi
+	andq	$PMD_PAGE_MASK, %rdi
 
 	movq	%rdi, %rax
 	shrq	$PUD_SHIFT, %rax
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc71..ae1d3d8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 	}
 
 	a = aper + iommu_size;
-	iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 
 	if (iommu_size < 64*1024*1024) {
 		printk(KERN_WARNING
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed..b7a7992 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -444,10 +444,10 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
 {
 	unsigned long end = address + size;
 
-	BUG_ON(address & ~LARGE_PAGE_MASK);
-	BUG_ON(size & ~LARGE_PAGE_MASK);
+	BUG_ON(address & ~PMD_PAGE_MASK);
+	BUG_ON(size & ~PMD_PAGE_MASK);
 
-	for (; address < end; address += LARGE_PAGE_SIZE) {
+	for (; address < end; address += PMD_PAGE_SIZE) {
 		pgd_t *pgd = pgd_offset_k(address);
 		pud_t *pud;
 		pmd_t *pmd;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7d21cd6..74446ea 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -273,8 +273,8 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address,
 
 	switch (level) {
 	case PG_LEVEL_2M:
-		psize = LARGE_PAGE_SIZE;
-		pmask = LARGE_PAGE_MASK;
+		psize = PMD_PAGE_SIZE;
+		pmask = PMD_PAGE_MASK;
 		break;
 	case PG_LEVEL_1G:
 	default:
@@ -363,7 +363,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	}
 
 	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK;
+	addr = address & PMD_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 #ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index c8b30ef..1cb7c51 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -13,8 +13,8 @@
 #define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
 #define PTE_MASK		(_AT(long, PHYSICAL_PAGE_MASK))
 
-#define LARGE_PAGE_SIZE		(_AC(1,UL) << PMD_SHIFT)
-#define LARGE_PAGE_MASK		(~(LARGE_PAGE_SIZE-1))
+#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
 
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
-- 
cgit v0.10.2


From 07cf89c05f2bbafa002401ac4e09ac31678513e4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: CPA fix pagetable split

Move the readout of the large entry into the spinlock section to
prevent an unlikely but possible race.

Mark the pmd/pud entry present after the split. We preserved the
non present bit in the new split mapping.

Remove the stale gfp_flags double initialization.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 74446ea..7288099 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -336,7 +336,7 @@ out_unlock:
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	pgprot_t ref_prot;
 	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned long flags, addr, pfn;
 	pte_t *pbase, *tmp;
@@ -344,7 +344,6 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	unsigned int i, level;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-	gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
 	gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
 #endif
 	base = alloc_pages(gfp_flags, 0);
@@ -368,6 +367,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 #ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 #endif
+	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
 	/*
 	 * Get the target pfn from the original entry:
@@ -377,13 +377,17 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	/*
-	 * Install the new, split up pagetable. Important detail here:
+	 * Install the new, split up pagetable. Important details here:
 	 *
 	 * On Intel the NX bit of all levels must be cleared to make a
 	 * page executable. See section 4.13.2 of Intel 64 and IA-32
 	 * Architectures Software Developer's Manual).
+	 *
+	 * Mark the entry present. The current mapping might be
+	 * set to not present, which we preserved above.
 	 */
 	ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+	pgprot_val(ref_prot) |= _PAGE_PRESENT;
 	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 	base = NULL;
 
-- 
cgit v0.10.2


From 64f351d197d9ae8ad9624998afa8ee18e696ca44 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: cpa selftest, skip non present entries

pud and pmd entries in the RAM area might be marked as non present.
Do not try to modify them in the selftest.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e78..398f3a5 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -137,7 +137,8 @@ static __init int exercise_pageattr(void)
 
 		for (k = 0; k < len[i]; k++) {
 			pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
-			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
+			    !(pte_val(*pte) & _PAGE_PRESENT)) {
 				addr[i] = 0;
 				break;
 			}
-- 
cgit v0.10.2


From 28d6ee41a6ff8139e442af2dc55928bfbb475586 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: switch pci-gart over to using set_memory_np() instead of
 clear_kernel_mapping()

pci-gart needs to unmap the IOMMU aperture to prevent cache corruptions.

Switch this over to using set_memory_np() instead of clear_kernel_mapping().

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ae1d3d8..845cbec 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -731,7 +731,8 @@ void __init gart_iommu_init(void)
 	 * the backing memory. The GART address is only used by PCI
 	 * devices.
 	 */
-	clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+	set_memory_np((unsigned long)__va(iommu_bus_base),
+				iommu_size >> PAGE_SHIFT);
 
 	/*
 	 * Try to workaround a bug (thanks to BenH)
-- 
cgit v0.10.2


From bde1965ce8c63e17cc284e1af616c85aba483f11 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: remove now unused clear_kernel_mapping

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b7a7992..5855449 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -435,49 +435,6 @@ void __init paging_init(void)
 #endif
 
 /*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
-	unsigned long end = address + size;
-
-	BUG_ON(address & ~PMD_PAGE_MASK);
-	BUG_ON(size & ~PMD_PAGE_MASK);
-
-	for (; address < end; address += PMD_PAGE_SIZE) {
-		pgd_t *pgd = pgd_offset_k(address);
-		pud_t *pud;
-		pmd_t *pmd;
-
-		if (pgd_none(*pgd))
-			continue;
-
-		pud = pud_offset(pgd, address);
-		if (pud_none(*pud))
-			continue;
-
-		pmd = pmd_offset(pud, address);
-		if (!pmd || pmd_none(*pmd))
-			continue;
-
-		if (!(pmd_val(*pmd) & _PAGE_PSE)) {
-			/*
-			 * Could handle this, but it should not happen
-			 * currently:
-			 */
-			printk(KERN_ERR "clear_kernel_mapping: "
-				"mapping has been split. will leak memory\n");
-			pmd_ERROR(*pmd);
-		}
-		set_pmd(pmd, __pmd(0));
-	}
-	__flush_tlb_all();
-}
-
-/*
  * Memory hotplug specific functions
  */
 void online_page(struct page *page)
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 6e615a1..5c86cff 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -21,7 +21,6 @@ extern pgd_t init_level4_pgt[];
 #define swapper_pg_dir init_level4_pgt
 
 extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 
 #endif /* !__ASSEMBLY__ */
 
-- 
cgit v0.10.2


From 6ce9fc17d913ae51f8434d2826f306347820b07d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: remove cpa warning

this race is legit and can happen on SMP systems.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7288099..0b029c9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -356,10 +356,8 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte) {
-		WARN_ON_ONCE(1);
+	if (tmp != kpte)
 		goto out_unlock;
-	}
 
 	address = __pa(address);
 	addr = address & PMD_PAGE_MASK;
-- 
cgit v0.10.2


From 7bfb72e847c201fe32271fb13f75d060671d8890 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:08 +0100
Subject: x86: fix page-present check in cpa_flush_range

pte_present() might return true for PROT_NONE mappings.
Explicitely check the present bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0b029c9..9be684e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -119,7 +119,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 		/*
 		 * Only flush present addresses:
 		 */
-		if (pte && pte_present(*pte))
+		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
 			clflush_cache_range((void *) addr, PAGE_SIZE);
 	}
 }
-- 
cgit v0.10.2


From d4f71f7969ee2c16e2969185280c13d4f51a9172 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: switch direct mapping setup over to set_pte

Use set_pte() for setting up the 2MB pages in the direct mapping.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5855449..3a98d6f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 	int i = pmd_index(address);
 
 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
-		unsigned long entry;
 		pmd_t *pmd = pmd_page + pmd_index(address);
 
 		if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		if (pmd_val(*pmd))
 			continue;
 
-		entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
-		entry &= __supported_pte_mask;
-		set_pmd(pmd, __pmd(entry));
+		set_pte((pte_t *)pmd,
+			pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
 	}
 }
 
-- 
cgit v0.10.2


From 019c3e7c5e93475002edfc0da6c59508247553b1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: add feature macros for the gbpages cpuid bit

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 3adc9cf..065e929 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -46,6 +46,7 @@
 #define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
 #define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_GBPAGES	(1*32+26) /* GB pages */
 #define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
 #define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
 #define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
@@ -179,6 +180,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_pebs		boot_cpu_has(X86_FEATURE_PEBS)
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
 #define cpu_has_bts		boot_cpu_has(X86_FEATURE_BTS)
+#define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
-- 
cgit v0.10.2


From fbff3c21aff29ffdfa46b50946696689d3e70a48 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: add PUD_PAGE_SIZE

a PUD entry covers 1GB of virtual memory.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c1ac42d..dcf0c07 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -23,6 +23,9 @@
 #define MCE_STACK 5
 #define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
 
+#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+
 #define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
 
 #define __PHYSICAL_START	CONFIG_PHYSICAL_START
-- 
cgit v0.10.2


From 61e19a347ad4bcdda615ef77ef9c3e656e254f3d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: add pgtable accessor functions for gbpages

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 21e70fb..935630d 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -148,6 +148,8 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
+static inline int pud_large(pud_t pud) { return 0; }
+
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
  *
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 5c86cff..bd4740a 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -198,6 +198,12 @@ static inline unsigned long pmd_bad(pmd_t pmd)
 #define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
 #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
 
+static inline int pud_large(pud_t pte)
+{
+	return (pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+		(_PAGE_PSE|_PAGE_PRESENT);
+}
+
 /* PMD  - Level 2 access */
 #define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-- 
cgit v0.10.2


From c2f71ee2140b2a506735ff9fcb7e3b1dfaab8f2b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: add gbpages support to lookup_address

[ tglx@linutronix.de: fix bootup crash on sparse mappings. ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9be684e..143fbaf 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -209,6 +209,11 @@ pte_t *lookup_address(unsigned long address, int *level)
 	pud = pud_offset(pgd, address);
 	if (pud_none(*pud))
 		return NULL;
+
+	*level = PG_LEVEL_1G;
+	if (pud_large(*pud) || !pud_present(*pud))
+		return (pte_t *)pud;
+
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		return NULL;
-- 
cgit v0.10.2


From b5360222273cb3e57a119c18eef42f59da4da87b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: support gbpages in pagetable dump

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3fff490..ad8b973 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
 	pud = pud_offset(pgd, address);
 	if (bad_address(pud)) goto bad;
 	printk("PUD %lx ", pud_val(*pud));
-	if (!pud_present(*pud))	goto ret;
+	if (!pud_present(*pud) || pud_large(*pud))
+		goto ret;
 
 	pmd = pmd_offset(pud, address);
 	if (bad_address(pmd)) goto bad;
-- 
cgit v0.10.2


From f07333fd149eb6826da26a89c3aff90324f270b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: implement gbpages support in change_page_attr()

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 143fbaf..42ca3d8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -281,7 +281,12 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address,
 		psize = PMD_PAGE_SIZE;
 		pmask = PMD_PAGE_MASK;
 		break;
+#ifdef CONFIG_X86_64
 	case PG_LEVEL_1G:
+		psize = PMD_PAGE_SIZE;
+		pmask = PMD_PAGE_MASK;
+		break;
+#endif
 	default:
 		res = -EINVAL;
 		goto out_unlock;
@@ -343,7 +348,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot;
 	gfp_t gfp_flags = GFP_KERNEL;
-	unsigned long flags, addr, pfn;
+	unsigned long flags, addr, pfn, pfninc = 1;
 	pte_t *pbase, *tmp;
 	struct page *base;
 	unsigned int i, level;
@@ -372,11 +377,19 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 #endif
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
+#ifdef CONFIG_X86_64
+	if (level == PG_LEVEL_1G) {
+		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+		pgprot_val(ref_prot) |= _PAGE_PSE;
+		addr &= PUD_PAGE_MASK;
+	}
+#endif
+
 	/*
 	 * Get the target pfn from the original entry:
 	 */
 	pfn = pte_pfn(*kpte);
-	for (i = 0; i < PTRS_PER_PTE; i++, pfn++)
+	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	/*
-- 
cgit v0.10.2


From 9df84993cb3d71669894654ab257f01f6e4ed48e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: cpa, cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 42ca3d8..029fb07 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,9 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
 struct cpa_data {
 	unsigned long	vaddr;
 	pgprot_t	mask_set;
@@ -206,6 +209,7 @@ pte_t *lookup_address(unsigned long address, int *level)
 
 	if (pgd_none(*pgd))
 		return NULL;
+
 	pud = pud_offset(pgd, address);
 	if (pud_none(*pud))
 		return NULL;
@@ -223,9 +227,13 @@ pte_t *lookup_address(unsigned long address, int *level)
 		return (pte_t *)pmd;
 
 	*level = PG_LEVEL_4K;
+
 	return pte_offset_kernel(pmd, address);
 }
 
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	/* change init_mm */
@@ -248,8 +256,9 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 #endif
 }
 
-static int try_preserve_large_page(pte_t *kpte, unsigned long address,
-				   struct cpa_data *cpa)
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+			struct cpa_data *cpa)
 {
 	unsigned long nextpage_addr, numpages, pmask, psize, flags;
 	pte_t new_pte, old_pte, *tmp;
@@ -341,17 +350,18 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address,
 
 out_unlock:
 	spin_unlock_irqrestore(&pgd_lock, flags);
+
 	return res;
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	pgprot_t ref_prot;
-	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned long flags, addr, pfn, pfninc = 1;
+	gfp_t gfp_flags = GFP_KERNEL;
+	unsigned int i, level;
 	pte_t *pbase, *tmp;
+	pgprot_t ref_prot;
 	struct page *base;
-	unsigned int i, level;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
@@ -505,7 +515,6 @@ repeat:
  *
  * Modules and drivers should use the set_memory_* APIs instead.
  */
-
 static int change_page_attr_addr(struct cpa_data *cpa)
 {
 	int err;
-- 
cgit v0.10.2


From beaff6333b4a21e8f3b7f9a7c3c8f8716b2334bc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:48:09 +0100
Subject: x86: cpa, eliminate CPA_ enum

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 029fb07..fb2eedb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -27,11 +27,6 @@ struct cpa_data {
 	int		flushtlb;
 };
 
-enum {
-	CPA_NO_SPLIT = 0,
-	CPA_SPLIT,
-};
-
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -263,7 +258,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	unsigned long nextpage_addr, numpages, pmask, psize, flags;
 	pte_t new_pte, old_pte, *tmp;
 	pgprot_t old_prot, new_prot;
-	int level, res = CPA_SPLIT;
+	int level, do_split = 1;
 
 	/*
 	 * An Athlon 64 X2 showed hard hangs if we tried to preserve
@@ -274,7 +269,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * disable this code until the hang can be debugged:
 	 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return res;
+		return 1;
 
 	spin_lock_irqsave(&pgd_lock, flags);
 	/*
@@ -297,7 +292,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 		break;
 #endif
 	default:
-		res = -EINVAL;
+		do_split = -EINVAL;
 		goto out_unlock;
 	}
 
@@ -325,7 +320,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * above:
 	 */
 	if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
-		res = CPA_NO_SPLIT;
+		do_split = 0;
 		goto out_unlock;
 	}
 
@@ -345,13 +340,13 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
 		__set_pmd_pte(kpte, address, new_pte);
 		cpa->flushtlb = 1;
-		res = CPA_NO_SPLIT;
+		do_split = 0;
 	}
 
 out_unlock:
 	spin_unlock_irqrestore(&pgd_lock, flags);
 
-	return res;
+	return do_split;
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
@@ -429,7 +424,7 @@ out_unlock:
 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
 	struct page *kpte_page;
-	int level, res;
+	int level, do_split;
 	pte_t *kpte;
 
 repeat:
@@ -480,25 +475,26 @@ repeat:
 	 * Check, whether we can keep the large page intact
 	 * and just change the pte:
 	 */
-	res = try_preserve_large_page(kpte, address, cpa);
-	if (res < 0)
-		return res;
+	do_split = try_preserve_large_page(kpte, address, cpa);
+	if (do_split < 0)
+		return do_split;
 
 	/*
 	 * When the range fits into the existing large page,
 	 * return. cp->numpages and cpa->tlbflush have been updated in
 	 * try_large_page:
 	 */
-	if (res == CPA_NO_SPLIT)
+	if (do_split == 0)
 		return 0;
 
 	/*
 	 * We have to split the large page:
 	 */
-	res = split_large_page(kpte, address);
-	if (res)
-		return res;
+	do_split = split_large_page(kpte, address);
+	if (do_split)
+		return do_split;
 	cpa->flushtlb = 1;
+
 	goto repeat;
 }
 
-- 
cgit v0.10.2


From 87f7f8fe328388a1430a4c27cbe684f3925fd8a5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 Feb 2008 16:48:10 +0100
Subject: x86: cpa, clean up code flow

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fb2eedb..4f03350 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -423,8 +423,8 @@ out_unlock:
 
 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
+	int level, do_split, err;
 	struct page *kpte_page;
-	int level, do_split;
 	pte_t *kpte;
 
 repeat:
@@ -476,26 +476,24 @@ repeat:
 	 * and just change the pte:
 	 */
 	do_split = try_preserve_large_page(kpte, address, cpa);
-	if (do_split < 0)
-		return do_split;
-
 	/*
 	 * When the range fits into the existing large page,
 	 * return. cp->numpages and cpa->tlbflush have been updated in
 	 * try_large_page:
 	 */
-	if (do_split == 0)
-		return 0;
+	if (do_split <= 0)
+		return do_split;
 
 	/*
 	 * We have to split the large page:
 	 */
-	do_split = split_large_page(kpte, address);
-	if (do_split)
-		return do_split;
-	cpa->flushtlb = 1;
+	err = split_large_page(kpte, address);
+	if (!err) {
+		cpa->flushtlb = 1;
+		goto repeat;
+	}
 
-	goto repeat;
+	return err;
 }
 
 /**
-- 
cgit v0.10.2


From 7b610eec7a06ede64f71459e7f412dfd96f4cc5e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Feb 2008 16:48:10 +0100
Subject: x86: cpa, micro-optimization

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f03350..bb55a78 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -237,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	if (!SHARED_KERNEL_PMD) {
 		struct page *page;
 
+		address = __pa(address);
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
 			pud_t *pud;
@@ -351,7 +352,7 @@ out_unlock:
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	unsigned long flags, addr, pfn, pfninc = 1;
+	unsigned long flags, pfn, pfninc = 1;
 	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned int i, level;
 	pte_t *pbase, *tmp;
@@ -374,8 +375,6 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	if (tmp != kpte)
 		goto out_unlock;
 
-	address = __pa(address);
-	addr = address & PMD_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 #ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
@@ -386,7 +385,6 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	if (level == PG_LEVEL_1G) {
 		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
 		pgprot_val(ref_prot) |= _PAGE_PSE;
-		addr &= PUD_PAGE_MASK;
 	}
 #endif
 
-- 
cgit v0.10.2


From 795d45b22c079946332bf3825afefe5a981a97b6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 4 Feb 2008 16:48:10 +0100
Subject: x86: fix RTC lockdep warning: potential hardirq recursion

After disabling both CONFIG_DEBUG_LOCKING_API_SELFTESTS and netconsole
(using current mainline) I get a login prompt, and also...

[    5.181668] SELinux: policy loaded with handle_unknown=deny
[    5.183315] type=1403 audit(1202100038.157:3): policy loaded auid=4294967295 ses=4294967295
[    5.822073] SELinux: initialized (dev usbfs, type usbfs), uses genfs_contexts
[    7.819146] ------------[ cut here ]------------
[    7.819146] WARNING: at kernel/lockdep.c:2033 trace_hardirqs_on+0x9b/0x10d()
[    7.819146] Modules linked in: generic ext3 jbd ide_disk ide_core
[    7.819146] Pid: 399, comm: hwclock Not tainted 2.6.24 #4
[    7.819146]  [<c011d140>] warn_on_slowpath+0x41/0x51
[    7.819146]  [<c01364a9>] ? lock_release_holdtime+0x50/0x56
[    7.819146]  [<c013770c>] ? check_usage_forwards+0x19/0x3b
[    7.819146]  [<c01390c4>] ? __lock_acquire+0xac3/0xb0b
[    7.819146]  [<c0108c98>] ? native_sched_clock+0x8b/0x9f
[    7.819146]  [<c01364a9>] ? lock_release_holdtime+0x50/0x56
[    7.819146]  [<c030ca6c>] ? _spin_unlock_irq+0x22/0x42
[    7.819146]  [<c013848b>] trace_hardirqs_on+0x9b/0x10d
[    7.819146]  [<c030ca6c>] _spin_unlock_irq+0x22/0x42
[    7.819146]  [<c011481e>] hpet_rtc_interrupt+0xdf/0x290
[    7.819146]  [<c014ea90>] handle_IRQ_event+0x1a/0x46
[    7.819146]  [<c014f8ea>] handle_edge_irq+0xbe/0xff
[    7.819146]  [<c0106e08>] do_IRQ+0x6d/0x84
[    7.819146]  [<c0105596>] common_interrupt+0x2e/0x34
[    7.819146]  [<c013007b>] ? ktime_get_ts+0x8/0x3f
[    7.819146]  [<c0139420>] ? lock_release+0x167/0x16f
[    7.819146]  [<c017974a>] ? core_sys_select+0x2c/0x327
[    7.819146]  [<c0179792>] core_sys_select+0x74/0x327
[    7.819146]  [<c0108c98>] ? native_sched_clock+0x8b/0x9f
[    7.819146]  [<c01364a9>] ? lock_release_holdtime+0x50/0x56
[    7.819146]  [<c030ca6c>] ? _spin_unlock_irq+0x22/0x42
[    7.819146]  [<c01384d6>] ? trace_hardirqs_on+0xe6/0x10d
[    7.819146]  [<c030ca77>] ? _spin_unlock_irq+0x2d/0x42
[    7.819146]  [<c023b437>] ? rtc_do_ioctl+0x11b/0x677
[    7.819146]  [<c01c487e>] ? inode_has_perm+0x5e/0x68
[    7.819146]  [<c01364a9>] ? lock_release_holdtime+0x50/0x56
[    7.819146]  [<c0108c98>] ? native_sched_clock+0x8b/0x9f
[    7.819146]  [<c01c490b>] ? file_has_perm+0x83/0x8c
[    7.819146]  [<c023ba08>] ? rtc_ioctl+0xf/0x11
[    7.819146]  [<c017898d>] ? do_ioctl+0x55/0x67
[    7.819146]  [<c0179d15>] sys_select+0x93/0x163
[    7.819146]  [<c0104b39>] ? sysenter_past_esp+0x9a/0xa5
[    7.819146]  [<c0104afe>] sysenter_past_esp+0x5f/0xa5
[    7.819146]  =======================
[    7.819146] ---[ end trace 96540ca301ffb84c ]---
[    7.819210] rtc: lost 6 interrupts
[    7.870668] type=1400 audit(1202128840.794:4): avc:  denied  { audit_write } for  pid=399 comm="hwclock" capability=29 scontext=system_u:system_r:hwclock_t:s0 tcontext=system_u:system_r:hwclock_t:s0 tclass=capability
[    9.538866] input: PC Speaker as /class/input/input5

Because hpet_rtc_interrupt()'s call to get_rtc_time() ends up
resolving to include/asm-generic/rtc.h's (hilariously inlined)
get_rtc_time(), which does spin_unlock_irq() from hard IRQ context.

The obvious patch fixes it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index d3238f1f..dd1bed8 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -35,10 +35,11 @@
 static inline unsigned char rtc_is_updating(void)
 {
 	unsigned char uip;
+	unsigned long flags;
 
-	spin_lock_irq(&rtc_lock);
+	spin_lock_irqsave(&rtc_lock, flags);
 	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
-	spin_unlock_irq(&rtc_lock);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	return uip;
 }
 
@@ -46,6 +47,8 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 {
 	unsigned long uip_watchdog = jiffies;
 	unsigned char ctrl;
+	unsigned long flags;
+
 #ifdef CONFIG_MACH_DECSTATION
 	unsigned int real_year;
 #endif
@@ -72,7 +75,7 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 	 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
 	 * by the RTC when initially set to a non-zero value.
 	 */
-	spin_lock_irq(&rtc_lock);
+	spin_lock_irqsave(&rtc_lock, flags);
 	time->tm_sec = CMOS_READ(RTC_SECONDS);
 	time->tm_min = CMOS_READ(RTC_MINUTES);
 	time->tm_hour = CMOS_READ(RTC_HOURS);
@@ -83,7 +86,7 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 	real_year = CMOS_READ(RTC_DEC_YEAR);
 #endif
 	ctrl = CMOS_READ(RTC_CONTROL);
-	spin_unlock_irq(&rtc_lock);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	{
-- 
cgit v0.10.2