From 76d08bb3f09054edc45326ce5c698a3f6c45f5d0 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 5 Jun 2006 13:54:14 -0700
Subject: [IA64] Add "model name" to /proc/cpuinfo

Linux ia64 port tried to decode the processor family number
to something human-readable, but Intel brandnames don't change
synchronously with updates to the family number.  Adopt a more
i386-like approach and just print the family number in decimal.
Add a new field "model name" that uses PAL_BRAND_INFO to find
the official name for the cpu, or on older systems, falls back
to using the well-known codenames (Merced, McKinley, Madison).

Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index e4dfda1..3f7067c 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -509,7 +509,7 @@ show_cpuinfo (struct seq_file *m, void *v)
 		{ 1UL << 1, "spontaneous deferral"},
 		{ 1UL << 2, "16-byte atomic ops" }
 	};
-	char family[32], features[128], *cp, sep;
+	char features[128], *cp, sep;
 	struct cpuinfo_ia64 *c = v;
 	unsigned long mask;
 	unsigned long proc_freq;
@@ -517,12 +517,6 @@ show_cpuinfo (struct seq_file *m, void *v)
 
 	mask = c->features;
 
-	switch (c->family) {
-	      case 0x07:	memcpy(family, "Itanium", 8); break;
-	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
-	      default:		sprintf(family, "%u", c->family); break;
-	}
-
 	/* build the feature string: */
 	memcpy(features, " standard", 10);
 	cp = features;
@@ -553,8 +547,9 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   "processor  : %d\n"
 		   "vendor     : %s\n"
 		   "arch       : IA-64\n"
-		   "family     : %s\n"
+		   "family     : %u\n"
 		   "model      : %u\n"
+		   "model name : %s\n"
 		   "revision   : %u\n"
 		   "archrev    : %u\n"
 		   "features   :%s\n"	/* don't change this---it _is_ right! */
@@ -563,7 +558,8 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   "cpu MHz    : %lu.%06lu\n"
 		   "itc MHz    : %lu.%06lu\n"
 		   "BogoMIPS   : %lu.%02lu\n",
-		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+		   cpunum, c->vendor, c->family, c->model,
+		   c->model_name, c->revision, c->archrev,
 		   features, c->ppn, c->number,
 		   proc_freq / 1000, proc_freq % 1000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
@@ -611,6 +607,31 @@ struct seq_operations cpuinfo_op = {
 	.show =		show_cpuinfo
 };
 
+static char brandname[128];
+
+static char * __cpuinit
+get_model_name(__u8 family, __u8 model)
+{
+	char brand[128];
+
+	if (ia64_pal_get_brand_info(brand)) {
+		if (family == 0x7)
+			memcpy(brand, "Merced", 7);
+		else if (family == 0x1f) switch (model) {
+			case 0: memcpy(brand, "McKinley", 9); break;
+			case 1: memcpy(brand, "Madison", 8); break;
+			case 2: memcpy(brand, "Madison up to 9M cache", 23); break;
+		} else
+			memcpy(brand, "Unknown", 8);
+	}
+	if (brandname[0] == '\0')
+		return strcpy(brandname, brand);
+	else if (strcmp(brandname, brand) == 0)
+		return brandname;
+	else
+		return kstrdup(brand, GFP_KERNEL);
+}
+
 static void __cpuinit
 identify_cpu (struct cpuinfo_ia64 *c)
 {
@@ -640,7 +661,6 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	pal_status_t status;
 	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
 	int i;
-
 	for (i = 0; i < 5; ++i)
 		cpuid.bits[i] = ia64_get_cpuid(i);
 
@@ -663,6 +683,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->family = cpuid.field.family;
 	c->archrev = cpuid.field.archrev;
 	c->features = cpuid.field.features;
+	c->model_name = get_model_name(c->family, c->model);
 
 	status = ia64_pal_vm_summary(&vm1, &vm2);
 	if (status == PAL_STATUS_SUCCESS) {
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 37e52a2..312109d 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -78,6 +78,7 @@
 #define PAL_VM_TR_READ		261	/* read contents of translation register */
 #define PAL_GET_PSTATE		262	/* get the current P-state */
 #define PAL_SET_PSTATE		263	/* set the P-state */
+#define PAL_BRAND_INFO		274	/* Processor branding information */
 
 #ifndef __ASSEMBLY__
 
@@ -1133,6 +1134,15 @@ ia64_pal_set_pstate (u64 pstate_index)
 	return iprv.status;
 }
 
+/* Processor branding information*/
+static inline s64
+ia64_pal_get_brand_info (char *brand_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0);
+	return iprv.status;
+}
+
 /* Cause the processor to enter LIGHT HALT state, where prefetching and execution are
  * suspended, but cache and TLB coherency is maintained.
  */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index b3bd58e..03100c4 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -164,6 +164,7 @@ struct cpuinfo_ia64 {
 	__u8 family;
 	__u8 archrev;
 	char vendor[16];
+	char *model_name;
 
 #ifdef CONFIG_NUMA
 	struct ia64_node_data *node_data;
-- 
cgit v0.10.2


From 2ab561a116e16cdee3ae0e13d51910634c15aee9 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <David.Mosberger@acm.org>
Date: Wed, 21 Jun 2006 11:19:22 -0700
Subject: [IA64] esi-support

Add support for making ESI calls [1].  ESI stands for "Extensible SAL
specification" and is basically a way for invoking firmware
subroutines which are identified by a GUID.  I don't know whether ESI
is used by vendors other than HP (if you do, please let me know) but
as firmware "backdoors" go, this seems one of the cleaner methods, so
it seems reasonable to support it, even though I'm not aware of any
publicly documented ESI calls.  I'd have liked to make the ESI module
completely stand-alone, but unfortunately that is not easily (or not
at all) possible because in order to make ESI calls in physical mode,
a small stub similar to the EFI stub is needed in the kernel proper.
I did try to create a stub that would work in user-level, but it
quickly got ugly beyond recognition (e.g., the stub had to make
assumptions about how the module-loader generated call-stubs work) and
I didn't even get it to work (that's probably fixable, but I didn't
bother because I concluded it was too ugly anyhow).  While it's not
terribly elegant to have kernel code which isn't actively used in the
kernel proper, I think it might be worth making an exception here for
two reasons: the code is trivially small (all that's really needed is
esi_stub.S) and by including it in the normal kernel distro, it might
encourage other OEMs to also use ESI, which I think would be far
better than each inventing their own firmware "backdoor".

The code was originally written by Alex.  I just massaged and packaged
it a bit (and perhaps messed up some things along the way...).

Changes since first version of patch that was posted to mailing list:
* Export ia64_esi_call and ia64_esi_call_phys() as GPL symbols.
* Disallow building esi.c as a module for now.  Building as a module
  would currently lead to an unresolved reference to "sal_lock" on SMP kernels
  because that symbol doesn't get exported.
* Export esi_call_phys() only if ESI is enabled.
* Remove internal stuff from esi.h and add a "proc_type" argument to
  ia64_esi_call() such that serialization-requirements can be expressed (ESI
  follows SAL here, where procedure calls may have to be serialized, are
  MP-safe, or MP-safe andr reentrant).

[1] h21007.www2.hp.com/dspp/tech/tech_TechDocumentDetailPage_IDX/1,1701,919,00.html

Signed-off-by: David Mosberger <David.Mosberger@acm.org>
Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0f3076a..bc10651 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -413,6 +413,14 @@ config IA64_PALINFO
 config SGI_SN
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 
+config IA64_ESI
+	bool "ESI (Extensible SAL Interface) support"
+	help
+	  If you say Y here, support is built into the kernel to
+	  make ESI calls.  ESI calls are used to support vendor-specific
+	  firmware extensions, such as the ability to inject memory-errors
+	  for test-purposes.  If you're unsure, say N.
+
 source "drivers/sn/Kconfig"
 
 source "drivers/firmware/Kconfig"
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 09a0dbc..66ac8e3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -31,6 +31,11 @@ obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 
+obj-$(CONFIG_IA64_ESI)		+= esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y				+= esi_stub.o	# must be in kernel proper
+endif
+
 # The gate DSO image is built using a special linker script.
 targets += gate.so gate-syms.o
 
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
new file mode 100644
index 0000000..ebf4e98
--- /dev/null
+++ b/arch/ia64/kernel/esi.c
@@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME	"esi"
+
+#define ESI_TABLE_GUID					\
+    EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3,		\
+	     0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+	ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ */
+#define ESI_DESC_SIZE(type)	"\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+	u8 type;
+	u8 reserved1[15];
+	u64 esi_proc;
+	u64 gp;
+	efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+	void *addr;
+	void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *systab;
+	unsigned long esi = 0;
+	char *p;
+	int i;
+
+	config_tables = __va(efi.systab->tables);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+		if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+			esi = config_tables[i].table;
+			break;
+		}
+	}
+
+	if (!esi)
+		return -ENODEV;;
+
+	systab = __va(esi);
+
+	if (strncmp(systab->signature, "ESIT", 4) != 0) {
+		printk(KERN_ERR "bad signature in ESI system table!");
+		return -ENODEV;
+	}
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		      case ESI_DESC_ENTRY_POINT:
+			break;
+		      default:
+			printk(KERN_WARNING "Unkown table type %d found in "
+			       "ESI table, ignoring rest of table\n", *p);
+			return -ENODEV;
+		}
+
+		p += ESI_DESC_SIZE(*p);
+	}
+
+	esi_systab = systab;
+	return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+		   enum esi_proc_type proc_type, u64 func,
+		   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+		   u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags = 0;
+	int i;
+	char *p;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = __va(esi->esi_proc);
+				pdesc.gp = __va(esi->gp);
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				ia64_save_scratch_fpregs(fr);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_lock_irqsave(&sal_lock, flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_save(flags);
+				else
+					preempt_disable();
+				*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+						     arg4, arg5, arg6, arg7);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_unlock_irqrestore(&sal_lock,
+							       flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_restore(flags);
+				else
+					preempt_enable();
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+			u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+			u64 arg5, u64 arg6, u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags;
+	u64 esi_params[8];
+	char *p;
+	int i;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = (void *)esi->esi_proc;
+				pdesc.gp = (void *)esi->gp;
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				esi_params[0] = func;
+				esi_params[1] = arg1;
+				esi_params[2] = arg2;
+				esi_params[3] = arg3;
+				esi_params[4] = arg4;
+				esi_params[5] = arg5;
+				esi_params[6] = arg6;
+				esi_params[7] = arg7;
+				ia64_save_scratch_fpregs(fr);
+				spin_lock_irqsave(&sal_lock, flags);
+				*isrvp = esi_call_phys(esi_proc, esi_params);
+				spin_unlock_irqrestore(&sal_lock, flags);
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit);	/* makes module removable... */
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
new file mode 100644
index 0000000..6b3d6c1
--- /dev/null
+++ b/arch/ia64/kernel/esi_stub.S
@@ -0,0 +1,96 @@
+/*
+ * ESI call stub.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Alex Williamson <alex.williamson@hp.com>
+ *
+ * Based on EFI call stub by David Mosberger.  The stub is virtually
+ * identical to the one for EFI phys-mode calls, except that ESI
+ * calls may have up to 8 arguments, so they get passed to this routine
+ * through memory.
+ *
+ * This stub allows us to make ESI calls in physical mode with interrupts
+ * turned off.  ESI calls may not support calling from virtual mode.
+ *
+ * Google for "Extensible SAL specification" for a document describing the
+ * ESI standard.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of ESI routine to call
+ *	in1 = address of array of ESI parameters
+ *
+ * Outputs:
+ *	r8 = result returned by called function
+ */
+GLOBAL_ENTRY(esi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,7,8,0
+	ld8 r2=[in0],8			// load ESI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	ld8 out0=[in1],8		// ESI params loaded from array
+	;;				// passing all as inputs doesn't work
+	ld8 out1=[in1],8
+	;;
+	ld8 out2=[in1],8
+	;;
+	ld8 out3=[in1],8
+	;;
+	ld8 out4=[in1],8
+	;;
+	ld8 out5=[in1],8
+	;;
+	ld8 out6=[in1],8
+	;;
+	ld8 out7=[in1]
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load ESI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov loc5=r19			// old ar.bsp
+	mov loc6=r20			// old sp
+	br.call.sptk.many rp=b6		// call the ESI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// save virtual mode psr
+	mov r19=loc5			// save virtual mode bspstore
+	mov r20=loc6			// save virtual mode sp
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(esi_call_phys)
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index bbcfd08..9583f31 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -106,5 +106,9 @@ EXPORT_SYMBOL(ia64_spinlock_contention);
 # endif
 #endif
 
+#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
+extern void esi_call_phys (void);
+EXPORT_SYMBOL_GPL(esi_call_phys);
+#endif
 extern char ia64_ivt[];
 EXPORT_SYMBOL(ia64_ivt);
diff --git a/include/asm-ia64/esi.h b/include/asm-ia64/esi.h
new file mode 100644
index 0000000..84aac0e
--- /dev/null
+++ b/include/asm-ia64/esi.h
@@ -0,0 +1,30 @@
+/*
+ * ESI service calls.
+ *
+ * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P.
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#ifndef esi_h
+#define esi_h
+
+#include <linux/efi.h>
+
+#define ESI_QUERY			0x00000001
+#define ESI_OPEN_HANDLE			0x02000000
+#define ESI_CLOSE_HANDLE		0x02000001
+
+enum esi_proc_type {
+	ESI_PROC_SERIALIZED,	/* calls need to be serialized */
+	ESI_PROC_MP_SAFE,	/* MP-safe, but not reentrant */
+	ESI_PROC_REENTRANT	/* MP-safe and reentrant */
+};
+
+extern int ia64_esi_init (void);
+extern struct ia64_sal_retval esi_call_phys (void *, u64 *);
+extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *,
+			 enum esi_proc_type,
+			 u64, u64, u64, u64, u64, u64, u64, u64);
+extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64,
+                              u64, u64, u64, u64, u64, u64);
+
+#endif /* esi_h */
-- 
cgit v0.10.2


From 214ddde2f95037e129eff7e895869771719c7c1b Mon Sep 17 00:00:00 2001
From: bibo mao <bibo.mao@intel.com>
Date: Tue, 26 Sep 2006 11:20:37 -0700
Subject: [IA64] kprobe opcode 16 bytes alignment on IA64

On IA64 instruction opcode must be 16 bytes alignment, in kprobe structure
there is one element to save original instruction, currently saved opcode
is not statically allocated in kprobe structure, that can not assure
16 bytes alignment. This patch dynamically allocated kprobe instruction
opcode to assure 16 bytes alignment.

Signed-off-by: bibo mao <bibo.mao@intel.com>
Acked-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 781960f..320cb7a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -136,10 +136,8 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
 static int __kprobes unsupported_inst(uint template, uint  slot,
 				      uint major_opcode,
 				      unsigned long kprobe_inst,
-				      struct kprobe *p)
+				      unsigned long addr)
 {
-	unsigned long addr = (unsigned long)p->addr;
-
 	if (bundle_encoding[template][slot] == I) {
 		switch (major_opcode) {
 			case 0x0: //I_UNIT_MISC_OPCODE:
@@ -217,7 +215,7 @@ static void __kprobes prepare_break_inst(uint template, uint  slot,
 					 struct kprobe *p)
 {
 	unsigned long break_inst = BREAK_INST;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
+	bundle_t *bundle = &p->opcode.bundle;
 
 	/*
 	 * Copy the original kprobe_inst qualifying predicate(qp)
@@ -423,11 +421,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
 	unsigned long kprobe_inst=0;
 	unsigned int slot = addr & 0xf, template, major_opcode = 0;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
-
-	memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
-	memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+	bundle_t *bundle;
 
+	bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
  	template = bundle->quad0.template;
 
 	if(valid_kprobe_addr(template, slot, addr))
@@ -440,20 +436,19 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	/* Get kprobe_inst and major_opcode from the bundle */
 	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
 
-	if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p))
+	if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr))
 			return -EINVAL;
 
-	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
 
-	return 0;
-}
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn)
+		return -ENOMEM;
+	memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
+	memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
 
-void __kprobes flush_insn_slot(struct kprobe *p)
-{
-	unsigned long arm_addr;
+	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
 
-	arm_addr = ((unsigned long)&p->opcode.bundle) & ~0xFULL;
-	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+	return 0;
 }
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
@@ -461,9 +456,10 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
 
-	flush_insn_slot(p);
-	memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
-	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+	flush_icache_range((unsigned long)p->ainsn.insn,
+			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+	memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
@@ -471,11 +467,18 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;
 
-	/* p->opcode contains the original unaltered bundle */
-	memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t));
-	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+	/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
+	memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+					 sizeof(kprobe_opcode_t));
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	mutex_lock(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	mutex_unlock(&kprobe_mutex);
+}
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
@@ -486,12 +489,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
  */
 static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
-  	unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
+  	unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
   	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
  	unsigned long template;
  	int slot = ((unsigned long)p->addr & 0xf);
 
-	template = p->opcode.bundle.quad0.template;
+	template = p->ainsn.insn->bundle.quad0.template;
 
  	if (slot == 1 && bundle_encoding[template][1] == L)
  		slot = 2;
@@ -553,7 +556,7 @@ turn_ss_off:
 
 static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 {
-	unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
+	unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle;
 	unsigned long slot = (unsigned long)p->addr & 0xf;
 
 	/* single step inline if break instruction */
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index 9389049..1b45b71 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -29,7 +29,8 @@
 #include <linux/percpu.h>
 #include <asm/break.h>
 
-#define MAX_INSN_SIZE   16
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE   1
 #define BREAK_INST	(long)(__IA64_BREAK_KPROBE << 6)
 
 typedef union cmp_inst {
@@ -94,7 +95,7 @@ struct kprobe_ctlblk {
 #define IP_RELATIVE_PREDICT_OPCODE	(7)
 #define LONG_BRANCH_OPCODE		(0xC)
 #define LONG_CALL_OPCODE		(0xD)
-#define arch_remove_kprobe(p)		do {} while (0)
+#define flush_insn_slot(p)		do { } while (0)
 
 typedef struct kprobe_opcode {
 	bundle_t bundle;
@@ -108,7 +109,7 @@ struct fnptr {
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
 	/* copy of the instruction to be emulated */
-	kprobe_opcode_t insn;
+	kprobe_opcode_t *insn;
  #define INST_FLAG_FIX_RELATIVE_IP_ADDR		1
  #define INST_FLAG_FIX_BRANCH_REG		2
  #define INST_FLAG_BREAK_INST			4
@@ -125,6 +126,6 @@ static inline void jprobe_return(void)
 }
 extern void invalidate_stacked_regs(void);
 extern void flush_register_stack(void);
-extern void flush_insn_slot(struct kprobe *p);
+extern void arch_remove_kprobe(struct kprobe *p);
 
 #endif				/* _ASM_KPROBES_H */
-- 
cgit v0.10.2


From fd32cb3a9c9f9399421408e8734cd8a6d9d1a09f Mon Sep 17 00:00:00 2001
From: Keshavamurthy Anil S <anil.s.keshavamurthy@intel.com>
Date: Mon, 25 Sep 2006 16:32:20 -0700
Subject: [IA64] kprobes: fixup the pagefault exception caused by probehandlers

If the user-specified kprobe handler causes the page fault when accessing
user space address, fixup this fault since do_page_fault() should not
continue as the kprobe handler are run with preemption disabled.

Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 320cb7a..169ec3a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -771,6 +771,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 			return 1;
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (ia64_done_with_exception(regs))
+			return 1;
 
 		/*
 		 * Let ia64_do_page_fault() fix it.
-- 
cgit v0.10.2


From dd562c05410e13e878a3ee0deb8ac06db2e132c7 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Thu, 21 Sep 2006 10:35:44 -0700
Subject: [IA64] Add interface so modules can discover whether multithreading
 is on.

Add is_multithreading_enabled() to check whether multi-threading
is enabled independently of which cpu is currently online

Signed-off-by: stephane eranian <eranian@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 6203ed4..f7d7f56 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -879,3 +879,27 @@ identify_siblings(struct cpuinfo_ia64 *c)
 	c->core_id = info.log1_cid;
 	c->thread_id = info.log1_tid;
 }
+
+/*
+ * returns non zero, if multi-threading is enabled
+ * on at least one physical package. Due to hotplug cpu
+ * and (maxcpus=), all threads may not necessarily be enabled
+ * even though the processor supports multi-threading.
+ */
+int is_multithreading_enabled(void)
+{
+	int i, j;
+
+	for_each_present_cpu(i) {
+		for_each_present_cpu(j) {
+			if (j == i)
+				continue;
+			if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
+				if (cpu_data(j)->core_id == cpu_data(i)->core_id)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(is_multithreading_enabled);
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 719ff30..6533eb4 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -128,6 +128,7 @@ extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
 extern void identify_siblings (struct cpuinfo_ia64 *);
+extern int is_multithreading_enabled(void);
 
 #else
 
-- 
cgit v0.10.2


From 35589a8fa8138244e7f2ef9317c440aa580c9335 Mon Sep 17 00:00:00 2001
From: Keshavamurthy Anil S <anil.s.keshavamurthy@intel.com>
Date: Tue, 26 Sep 2006 12:03:13 -0700
Subject: [IA64] Move perfmon tables from thread_struct to pfm_context

This patch renders thread_struct->pmcs[] and thread_struct->pmds[]
OBSOLETE. The actual table is moved to pfm_context structure which
saves space in thread_struct (in turn saving space in task_struct
which frees up more space for kernel stacks).

Signed-off-by: Stephane Eranian <eranian@hpl.hp.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52..45000d5 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -62,6 +62,9 @@
 
 #define PFM_INVALID_ACTIVATION	(~0UL)
 
+#define PFM_NUM_PMC_REGS	64	/* PMC save area for ctxsw */
+#define PFM_NUM_PMD_REGS	64	/* PMD save area for ctxsw */
+
 /*
  * depth of message queue
  */
@@ -296,14 +299,17 @@ typedef struct pfm_context {
 	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
 	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
 
-	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
+	unsigned long		ctx_pmcs[PFM_NUM_PMC_REGS];	/*  saved copies of PMC values */
 
 	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
 	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
 	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
 	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
 
-	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+	pfm_counter_t		ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
+
+	unsigned long		th_pmcs[PFM_NUM_PMC_REGS];	/* PMC thread save state */
+	unsigned long		th_pmds[PFM_NUM_PMD_REGS];	/* PMD thread save state */
 
 	u64			ctx_saved_psr_up;	/* only contains psr.up value */
 
@@ -867,7 +873,6 @@ static void
 pfm_mask_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, val, ovfl_mask;
 	int i;
 
@@ -888,7 +893,7 @@ pfm_mask_monitoring(struct task_struct *task)
 	 * So in both cases, the live register contains the owner's
 	 * state. We can ONLY touch the PMU registers and NOT the PSR.
 	 *
-	 * As a consequence to this call, the thread->pmds[] array
+	 * As a consequence to this call, the ctx->th_pmds[] array
 	 * contains stale information which must be ignored
 	 * when context is reloaded AND monitoring is active (see
 	 * pfm_restart).
@@ -923,9 +928,9 @@ pfm_mask_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
-		th->pmcs[i] &= ~0xfUL;
-		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+		ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
+		ctx->th_pmcs[i] &= ~0xfUL;
+		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 	/*
 	 * make all of this visible
@@ -942,7 +947,6 @@ static void
 pfm_restore_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, ovfl_mask;
 	unsigned long psr, val;
 	int i, is_system;
@@ -1008,9 +1012,9 @@ pfm_restore_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		th->pmcs[i] = ctx->ctx_pmcs[i];
-		ia64_set_pmc(i, th->pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		ia64_set_pmc(i, ctx->th_pmcs[i]);
+		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
 	}
 	ia64_srlz_d();
 
@@ -1069,7 +1073,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
 static inline void
 pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long ovfl_val = pmu_conf->ovfl_val;
 	unsigned long mask = ctx->ctx_all_pmds[0];
 	unsigned long val;
@@ -1091,11 +1094,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 			ctx->ctx_pmds[i].val = val & ~ovfl_val;
 			 val &= ovfl_val;
 		}
-		thread->pmds[i] = val;
+		ctx->th_pmds[i] = val;
 
 		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
 			i,
-			thread->pmds[i],
+			ctx->th_pmds[i],
 			ctx->ctx_pmds[i].val));
 	}
 }
@@ -1106,7 +1109,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 static inline void
 pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long mask = ctx->ctx_all_pmcs[0];
 	int i;
 
@@ -1114,8 +1116,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 
 	for (i=0; mask; i++, mask>>=1) {
 		/* masking 0 with ovfl_val yields 0 */
-		thread->pmcs[i] = ctx->ctx_pmcs[i];
-		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 }
 
@@ -2859,7 +2861,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
 static int
 pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, pmc_pm;
@@ -2880,7 +2881,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (is_loaded) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3035,7 +3035,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 *
 		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
 		 *
-		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
+		 * The value in th_pmcs[] may be modified on overflow, i.e.,  when
 		 * monitoring needs to be stopped.
 		 */
 		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
@@ -3049,7 +3049,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 			 * write thread state
 			 */
-			if (is_system == 0) thread->pmcs[cnum] = value;
+			if (is_system == 0) ctx->th_pmcs[cnum] = value;
 
 			/*
 			 * write hardware register if we can
@@ -3101,7 +3101,6 @@ error:
 static int
 pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, hw_value, ovfl_mask;
@@ -3125,7 +3124,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * the owner of the local PMU.
 	 */
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3233,7 +3231,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 		 	 * write thread state
 		 	 */
-			if (is_system == 0) thread->pmds[cnum] = hw_value;
+			if (is_system == 0) ctx->th_pmds[cnum] = hw_value;
 
 			/*
 			 * write hardware register if we can
@@ -3299,7 +3297,6 @@ abort_mission:
 static int
 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	unsigned long val = 0UL, lval, ovfl_mask, sval;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
@@ -3323,7 +3320,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3385,7 +3381,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			 * if context is zombie, then task does not exist anymore.
 			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
 			 */
-			val = is_loaded ? thread->pmds[cnum] : 0UL;
+			val = is_loaded ? ctx->th_pmds[cnum] : 0UL;
 		}
 		rd_func = pmu_conf->pmd_desc[cnum].read_check;
 
@@ -4354,8 +4350,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	pfm_copy_pmds(task, ctx);
 	pfm_copy_pmcs(task, ctx);
 
-	pmcs_source = thread->pmcs;
-	pmds_source = thread->pmds;
+	pmcs_source = ctx->th_pmcs;
+	pmds_source = ctx->th_pmds;
 
 	/*
 	 * always the case for system-wide
@@ -5864,14 +5860,12 @@ void
 pfm_save_regs(struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 	u64 psr;
 
 
 	ctx = PFM_GET_CTX(task);
 	if (ctx == NULL) return;
-	t = &task->thread;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -5929,19 +5923,19 @@ pfm_save_regs(struct task_struct *task)
 	 * guarantee we will be schedule at that same
 	 * CPU again.
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * we will need it on the restore path to check
 	 * for pending overflow.
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * finally, allow context access.
@@ -5986,7 +5980,6 @@ static void
 pfm_lazy_save_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 
 	{ u64 psr  = pfm_get_psr();
@@ -5994,7 +5987,6 @@ pfm_lazy_save_regs (struct task_struct *task)
 	}
 
 	ctx = PFM_GET_CTX(task);
-	t   = &task->thread;
 
 	/*
 	 * we need to mask PMU overflow here to
@@ -6019,19 +6011,19 @@ pfm_lazy_save_regs (struct task_struct *task)
 	/*
 	 * save all the pmds we use
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * it is needed to check for pended overflow
 	 * on the restore path
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * now get can unmask PMU interrupts, they will
@@ -6050,7 +6042,6 @@ void
 pfm_load_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
 	unsigned long flags;
 	u64 psr, psr_up;
@@ -6061,11 +6052,10 @@ pfm_load_regs (struct task_struct *task)
 
 	BUG_ON(GET_PMU_OWNER());
 
-	t     = &task->thread;
 	/*
 	 * possible on unload
 	 */
-	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
+	if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -6147,21 +6137,21 @@ pfm_load_regs (struct task_struct *task)
 	 *
 	 * XXX: optimize here
 	 */
-	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
-	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
+	if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
@@ -6214,7 +6204,6 @@ pfm_load_regs (struct task_struct *task)
 void
 pfm_load_regs (struct task_struct *task)
 {
-	struct thread_struct *t;
 	pfm_context_t *ctx;
 	struct task_struct *owner;
 	unsigned long pmd_mask, pmc_mask;
@@ -6223,7 +6212,6 @@ pfm_load_regs (struct task_struct *task)
 
 	owner = GET_PMU_OWNER();
 	ctx   = PFM_GET_CTX(task);
-	t     = &task->thread;
 	psr   = pfm_get_psr();
 
 	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
@@ -6286,22 +6274,22 @@ pfm_load_regs (struct task_struct *task)
 	 */
 	pmc_mask = ctx->ctx_all_pmcs[0];
 
-	pfm_restore_pmds(t->pmds, pmd_mask);
-	pfm_restore_pmcs(t->pmcs, pmc_mask);
+	pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
 
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
@@ -6376,11 +6364,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		 */
 		pfm_unfreeze_pmu();
 	} else {
-		pmc0 = task->thread.pmcs[0];
+		pmc0 = ctx->th_pmcs[0];
 		/*
 		 * clear whatever overflow status bits there were
 		 */
-		task->thread.pmcs[0] = 0;
+		ctx->th_pmcs[0] = 0;
 	}
 	ovfl_val = pmu_conf->ovfl_val;
 	/*
@@ -6401,7 +6389,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		/*
 		 * can access PMU always true in system wide mode
 		 */
-		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
+		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i];
 
 		if (PMD_IS_COUNTING(i)) {
 			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
@@ -6433,7 +6421,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
 		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
 
-		if (is_self) task->thread.pmds[i] = pmd_val;
+		if (is_self) ctx->th_pmds[i] = pmd_val;
 
 		ctx->ctx_pmds[i].val = val;
 	}
@@ -6677,7 +6665,7 @@ pfm_init(void)
 	       ffz(pmu_conf->ovfl_val));
 
 	/* sanity check */
-	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
+	if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) {
 		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
 		pmu_conf = NULL;
 		return -1;
@@ -6752,7 +6740,6 @@ void
 dump_pmu_state(const char *from)
 {
 	struct task_struct *task;
-	struct thread_struct *t;
 	struct pt_regs *regs;
 	pfm_context_t *ctx;
 	unsigned long psr, dcr, info, flags;
@@ -6797,16 +6784,14 @@ dump_pmu_state(const char *from)
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->pp = 0;
 
-	t = &current->thread;
-
 	for (i=1; PMC_IS_LAST(i) == 0; i++) {
 		if (PMC_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]);
 	}
 
 	for (i=1; PMD_IS_LAST(i) == 0; i++) {
 		if (PMD_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]);
 	}
 
 	if (ctx) {
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index ead68f8..5830d36 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -20,12 +20,6 @@
 #include <asm/ustack.h>
 
 #define IA64_NUM_DBG_REGS	8
-/*
- * Limits for PMC and PMD are set to less than maximum architected values
- * but should be sufficient for a while
- */
-#define IA64_NUM_PMC_REGS	64
-#define IA64_NUM_PMD_REGS	64
 
 #define DEFAULT_MAP_BASE	__IA64_UL_CONST(0x2000000000000000)
 #define DEFAULT_TASK_SIZE	__IA64_UL_CONST(0xa000000000000000)
@@ -263,13 +257,9 @@ struct thread_struct {
 # define INIT_THREAD_IA32
 #endif /* CONFIG_IA32_SUPPORT */
 #ifdef CONFIG_PERFMON
-	__u64 pmcs[IA64_NUM_PMC_REGS];
-	__u64 pmds[IA64_NUM_PMD_REGS];
 	void *pfm_context;		     /* pointer to detailed PMU context */
 	unsigned long pfm_needs_checking;    /* when >0, pending perfmon work on kernel exit */
-# define INIT_THREAD_PM		.pmcs =			{0UL, },  \
-				.pmds =			{0UL, },  \
-				.pfm_context =		NULL,     \
+# define INIT_THREAD_PM		.pfm_context =		NULL,     \
 				.pfm_needs_checking =	0UL,
 #else
 # define INIT_THREAD_PM
-- 
cgit v0.10.2


From fd1dfc6f0165e2ad426665e517103672d6832f90 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 5 Sep 2006 11:56:51 -0700
Subject: [IA64-SGI] Do not request DMA memory for BTE

The GFP_DMA option usually does nothing on SN2 since all of memory is in thei
DMA zone and the BTE has always been capable of addressing all of memory.
So there is no need to get memory from a restricted range of memory (which
is what GFP_DMA is for).

Remove useless __GFP_DMA option.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 27dee45..7f73ad4 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -277,8 +277,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	}
 
 	/* temporary buffer used during unaligned transfers */
-	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
-				     GFP_KERNEL | GFP_DMA);
+	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL);
 	if (bteBlock_unaligned == NULL) {
 		return BTEFAIL_NOTAVAIL;
 	}
-- 
cgit v0.10.2


From a192dc16000241dc02990a36b6830839b73c44de Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 26 Sep 2006 14:00:56 -0700
Subject: [IA64] Implement futex primitives

Implement futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic()
on IA64 in order to fully support all futex functionality.

Signed-off-by: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h
index 6a332a9..07d77f3 100644
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -1,6 +1,124 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#include <asm-generic/futex.h>
+#include <linux/futex.h>
+#include <asm/errno.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
 
-#endif
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	__asm__ __volatile__(						\
+		"	mf;;					\n"	\
+		"[1:] "	insn ";;				\n"	\
+		"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"	\
+		"[2:]"							\
+		: "+r" (r8), "=r" (oldval)				\
+		: "r" (uaddr), "r" (oparg)				\
+		: "memory");						\
+	ret = r8;							\
+} while (0)
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	int val, newval;						\
+	do {								\
+		__asm__ __volatile__(					\
+			"	mf;;				  \n"	\
+			"[1:]	ld4 %3=[%4];;			  \n"	\
+			"	mov %2=%3			  \n"	\
+				insn	";;			  \n"	\
+			"	mov ar.ccv=%2;;			  \n"	\
+			"[2:]	cmpxchg4.acq %1=[%4],%3,ar.ccv;;  \n"	\
+			"	.xdata4 \"__ex_table\", 1b-., 3f-.\n"	\
+			"	.xdata4 \"__ex_table\", 2b-., 3f-.\n"	\
+			"[3:]"						\
+			: "+r" (r8), "=r" (val), "=&r" (oldval),	\
+			   "=&r" (newval)				\
+			: "r" (uaddr), "r" (oparg)			\
+			: "memory");					\
+		if (unlikely (r8))					\
+			break;						\
+	} while (unlikely (val != oldval));				\
+	ret = r8;							\
+} while (0)
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	inc_preempt_count();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr,
+				   oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr,
+				   ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	dec_preempt_count();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	{
+		register unsigned long r8 __asm ("r8");
+		__asm__ __volatile__(
+			"	mf;;					\n"
+			"	mov ar.ccv=%3;;				\n"
+			"[1:]	cmpxchg4.acq %0=[%1],%2,ar.ccv		\n"
+			"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"
+			"[2:]"
+			: "=r" (r8)
+			: "r" (uaddr), "r" (newval),
+			  "rO" ((long) (unsigned) oldval)
+			: "memory");
+		return r8;
+	}
+}
+
+#endif /* _ASM_FUTEX_H */
-- 
cgit v0.10.2


From 5c55cd63a77a85f603c98c2171a8054ca34b6a9f Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 26 Sep 2006 14:04:42 -0700
Subject: Revert "[IA64] Unwire set/get_robust_list"

This reverts commit 2636255488484e04d6d54303d2b0ec30f7ef7e02.

Jakub Jelinek provided the missing futex_atomic_cmpxchg_inatomic()
function, so now it should be safe to re-enable these syscalls.

Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index fef0657..12701cf 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1605,8 +1605,8 @@ sys_call_table:
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
 	data8 sys_splice
-	data8 sys_ni_syscall			// reserved for set_robust_list
-	data8 sys_ni_syscall			// reserved for get_robust_list
+	data8 sys_set_robust_list
+	data8 sys_get_robust_list
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_vmsplice
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index f581662..bb0eb72 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -286,7 +286,8 @@
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
 #define __NR_splice			1297
-/* 1298, 1299 reserved for set_robust_list/get_robust_list */
+#define __NR_set_robust_list		1298
+#define __NR_get_robust_list		1299
 #define __NR_sync_file_range		1300
 #define __NR_tee			1301
 #define __NR_vmsplice			1302
-- 
cgit v0.10.2


From f5a3f3dc189485d607fbd42678cc23958acc0a6e Mon Sep 17 00:00:00 2001
From: Zou Nan hai <nanhai.zou@intel.com>
Date: Thu, 14 Sep 2006 08:25:15 +0800
Subject: [IA64] Make gp value point to Region 5 in mca handler

MCA dispatch code take physical address of GP passed from SAL, then call
DATA_PA_TO_VA twice on GP before call into C code.  The first time is
in ia64_set_kernel_register, the second time is in VIRTUAL_MODE_ENTER.
The gp is changed to a virtual address in region 7 because DATA_PA_TO_VA
is implemented by dep instruction.

However when notify blocks were called from MCA handler code, because
notify blocks are supported by callback function pointers, gp value
value was switched to region 5 again.

The patch set gp register to kernel gp of region 5 at entry of MCA
dispatch.

Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 9604749..ee3ff76 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -1025,18 +1025,13 @@ ia64_old_stack:
 
 ia64_set_kernel_registers:
 	add temp3=MCA_SP_OFFSET, r3
-	add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3
 	mov b0=r2		// save return address
 	GET_IA64_MCA_DATA(temp1)
 	;;
-	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
 	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
 	add r13=temp1, r3	// set current to start of MCA/INIT stack
 	add r20=temp1, r3	// physical start of MCA/INIT stack
 	;;
-	ld8 r1=[temp4]		// OS GP from SAL OS state
-	;;
-	DATA_PA_TO_VA(r1,temp1)
 	DATA_PA_TO_VA(r12,temp2)
 	DATA_PA_TO_VA(r13,temp3)
 	;;
diff --git a/include/asm-ia64/mca_asm.h b/include/asm-ia64/mca_asm.h
index 27c9203..76203f9 100644
--- a/include/asm-ia64/mca_asm.h
+++ b/include/asm-ia64/mca_asm.h
@@ -197,9 +197,9 @@
 	movl	temp2 = start_addr;				\
 	;;							\
 	mov	cr.iip = temp2;					\
+	movl	gp = __gp					\
 	;;							\
 	DATA_PA_TO_VA(sp, temp1);				\
-	DATA_PA_TO_VA(gp, temp2);				\
 	srlz.i;							\
 	;;							\
 	nop	1;						\
-- 
cgit v0.10.2


From 709a6c1c07673a9f7879b5f7306dc8d723db93a2 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 13 Sep 2006 08:43:42 -0400
Subject: [IA64] show_mem() printk levels

Use the default sysrq printk level for printing show_mem() output both
for disconfig and contig versions. This is consistent with the printk
level used on other architectures (well ia32 at least).

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e004143..537c2f3 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -41,10 +41,11 @@ show_mem (void)
 	int i, total = 0, reserved = 0;
 	int shared = 0, cached = 0;
 
-	printk("Mem-info:\n");
+	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	printk(KERN_INFO "Free swap:       %6ldkB\n",
+	       nr_swap_pages<<(PAGE_SHIFT-10));
 	i = max_mapnr;
 	for (i = 0; i < max_mapnr; i++) {
 		if (!pfn_valid(i)) {
@@ -63,12 +64,12 @@ show_mem (void)
 		else if (page_count(mem_map + i))
 			shared += page_count(mem_map + i) - 1;
 	}
-	printk("%d pages of RAM\n", total);
-	printk("%d reserved pages\n", reserved);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-	printk("%ld pages in page table cache\n",
-		pgtable_quicklist_total_size());
+	printk(KERN_INFO "%d pages of RAM\n", total);
+	printk(KERN_INFO "%d reserved pages\n", reserved);
+	printk(KERN_INFO "%d pages shared\n", shared);
+	printk(KERN_INFO "%d pages swap cached\n", cached);
+	printk(KERN_INFO "%ld pages in page table cache\n",
+	       pgtable_quicklist_total_size());
 }
 
 /* physical address where the bootmem map is located */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index d260bff..0ce2474 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -547,15 +547,16 @@ void show_mem(void)
 	unsigned long total_present = 0;
 	pg_data_t *pgdat;
 
-	printk("Mem-info:\n");
+	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	printk(KERN_INFO "Free swap:       %6ldkB\n",
+	       nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
 
-		printk("Node ID: %d\n", pgdat->node_id);
+		printk(KERN_INFO "Node ID: %d\n", pgdat->node_id);
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -579,18 +580,18 @@ void show_mem(void)
 		total_reserved += reserved;
 		total_cached += cached;
 		total_shared += shared;
-		printk("\t%ld pages of RAM\n", present);
-		printk("\t%d reserved pages\n", reserved);
-		printk("\t%d pages shared\n", shared);
-		printk("\t%d pages swap cached\n", cached);
+		printk(KERN_INFO "\t%ld pages of RAM\n", present);
+		printk(KERN_INFO "\t%d reserved pages\n", reserved);
+		printk(KERN_INFO "\t%d pages shared\n", shared);
+		printk(KERN_INFO "\t%d pages swap cached\n", cached);
 	}
-	printk("%ld pages of RAM\n", total_present);
-	printk("%d reserved pages\n", total_reserved);
-	printk("%d pages shared\n", total_shared);
-	printk("%d pages swap cached\n", total_cached);
-	printk("Total of %ld pages in page table cache\n",
-		pgtable_quicklist_total_size());
-	printk("%d free buffer pages\n", nr_free_buffer_pages());
+	printk(KERN_INFO "%ld pages of RAM\n", total_present);
+	printk(KERN_INFO "%d reserved pages\n", total_reserved);
+	printk(KERN_INFO "%d pages shared\n", total_shared);
+	printk(KERN_INFO "%d pages swap cached\n", total_cached);
+	printk(KERN_INFO "Total of %ld pages in page table cache\n",
+	       pgtable_quicklist_total_size());
+	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 }
 
 /**
-- 
cgit v0.10.2


From 816add4e986499145135c4014a7c8a8857f9f3c3 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 14 Sep 2006 07:10:30 -0400
Subject: [IA64] trim output of show_mem()

Cut the number of lines of memory info output per node from five
to one line.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 0ce2474..6ea2797 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -551,12 +551,12 @@ void show_mem(void)
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n",
 	       nr_swap_pages<<(PAGE_SHIFT-10));
+	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
 
-		printk(KERN_INFO "Node ID: %d\n", pgdat->node_id);
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -580,10 +580,9 @@ void show_mem(void)
 		total_reserved += reserved;
 		total_cached += cached;
 		total_shared += shared;
-		printk(KERN_INFO "\t%ld pages of RAM\n", present);
-		printk(KERN_INFO "\t%d reserved pages\n", reserved);
-		printk(KERN_INFO "\t%d pages shared\n", shared);
-		printk(KERN_INFO "\t%d pages swap cached\n", cached);
+		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
+		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
 	printk(KERN_INFO "%d reserved pages\n", total_reserved);
-- 
cgit v0.10.2


From 43ed3baf623410b3fa6ca14a9d3f6deca3493c56 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 26 Sep 2006 14:44:37 -0700
Subject: [IA64] printing support for MCA/INIT

Printing message to console from MCA/INIT handler is useful,
however doing oops_in_progress = 1 in them exactly makes
something in kernel wrong. Especially it sounds ugly if
system goes wrong after returning from recoverable MCA.

This patch adds ia64_mca_printk() function that collects
messages into temporary-not-so-large message buffer during
in MCA/INIT environment and print them out later, after
returning to normal context or when handlers determine to
down the system.

Also this print function is exported for use in extensional
MCA handler. It would be useful to describe detail about
recovery.

NOTE:
I don't think it is sane thing if temporary message buffer
is enlarged enough to hold whole stack dumps from INIT, so
buffering is disabled during stack dump from INIT-monarch
(= default_monarch_init_process). please fix it in future.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 2fbe453..98f3b26 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -54,6 +54,9 @@
  *
  * 2005-10-07 Keith Owens <kaos@sgi.com>
  *	      Add notify_die() hooks.
+ *
+ * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ * 	      Add printing support for MCA/INIT.
  */
 #include <linux/types.h>
 #include <linux/init.h>
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
 
 static int mca_init __initdata;
 
+/*
+ * limited & delayed printing support for MCA/INIT handler
+ */
+
+#define mprintk(fmt...) ia64_mca_printk(fmt)
+
+#define MLOGBUF_SIZE (512+256*NR_CPUS)
+#define MLOGBUF_MSGMAX 256
+static char mlogbuf[MLOGBUF_SIZE];
+static DEFINE_SPINLOCK(mlogbuf_wlock);	/* mca context only */
+static DEFINE_SPINLOCK(mlogbuf_rlock);	/* normal context only */
+static unsigned long mlogbuf_start;
+static unsigned long mlogbuf_end;
+static unsigned int mlogbuf_finished = 0;
+static unsigned long mlogbuf_timestamp = 0;
+
+static int loglevel_save = -1;
+#define BREAK_LOGLEVEL(__console_loglevel)		\
+	oops_in_progress = 1;				\
+	if (loglevel_save < 0)				\
+		loglevel_save = __console_loglevel;	\
+	__console_loglevel = 15;
+
+#define RESTORE_LOGLEVEL(__console_loglevel)		\
+	if (loglevel_save >= 0) {			\
+		__console_loglevel = loglevel_save;	\
+		loglevel_save = -1;			\
+	}						\
+	mlogbuf_finished = 0;				\
+	oops_in_progress = 0;
+
+/*
+ * Push messages into buffer, print them later if not urgent.
+ */
+void ia64_mca_printk(const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+
+	va_start(args, fmt);
+	printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
+	va_end(args);
+
+	/* Copy the output into mlogbuf */
+	if (oops_in_progress) {
+		/* mlogbuf was abandoned, use printk directly instead. */
+		printk(temp_buf);
+	} else {
+		spin_lock(&mlogbuf_wlock);
+		for (p = temp_buf; *p; p++) {
+			unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
+			if (next != mlogbuf_start) {
+				mlogbuf[mlogbuf_end] = *p;
+				mlogbuf_end = next;
+			} else {
+				/* buffer full */
+				break;
+			}
+		}
+		mlogbuf[mlogbuf_end] = '\0';
+		spin_unlock(&mlogbuf_wlock);
+	}
+}
+EXPORT_SYMBOL(ia64_mca_printk);
+
+/*
+ * Print buffered messages.
+ *  NOTE: call this after returning normal context. (ex. from salinfod)
+ */
+void ia64_mlogbuf_dump(void)
+{
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+	unsigned long index;
+	unsigned long flags;
+	unsigned int printed_len;
+
+	/* Get output from mlogbuf */
+	while (mlogbuf_start != mlogbuf_end) {
+		temp_buf[0] = '\0';
+		p = temp_buf;
+		printed_len = 0;
+
+		spin_lock_irqsave(&mlogbuf_rlock, flags);
+
+		index = mlogbuf_start;
+		while (index != mlogbuf_end) {
+			*p = mlogbuf[index];
+			index = (index + 1) % MLOGBUF_SIZE;
+			if (!*p)
+				break;
+			p++;
+			if (++printed_len >= MLOGBUF_MSGMAX - 1)
+				break;
+		}
+		*p = '\0';
+		if (temp_buf[0])
+			printk(temp_buf);
+		mlogbuf_start = index;
+
+		mlogbuf_timestamp = 0;
+		spin_unlock_irqrestore(&mlogbuf_rlock, flags);
+	}
+}
+EXPORT_SYMBOL(ia64_mlogbuf_dump);
+
+/*
+ * Call this if system is going to down or if immediate flushing messages to
+ * console is required. (ex. recovery was failed, crash dump is going to be
+ * invoked, long-wait rendezvous etc.)
+ *  NOTE: this should be called from monarch.
+ */
+static void ia64_mlogbuf_finish(int wait)
+{
+	BREAK_LOGLEVEL(console_loglevel);
+
+	spin_lock_init(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+	printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
+		"MCA/INIT might be dodgy or fail.\n");
+
+	if (!wait)
+		return;
+
+	/* wait for console */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+
+	mlogbuf_finished = 1;
+}
+EXPORT_SYMBOL(ia64_mlogbuf_finish);
+
+/*
+ * Print buffered messages from INIT context.
+ */
+static void ia64_mlogbuf_dump_from_init(void)
+{
+	if (mlogbuf_finished)
+		return;
+
+	if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
+			" and the system seems to be messed up.\n");
+		ia64_mlogbuf_finish(0);
+		return;
+	}
+
+	if (!spin_trylock(&mlogbuf_rlock)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
+			"Generated messages other than stack dump will be "
+			"buffered to mlogbuf and will be printed later.\n");
+		printk(KERN_ERR "INIT: If messages would not printed after "
+			"this INIT, wait 30sec and assert INIT again.\n");
+		if (!mlogbuf_timestamp)
+			mlogbuf_timestamp = jiffies;
+		return;
+	}
+	spin_unlock(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+}
 
 static void inline
 ia64_mca_spin(const char *func)
 {
-	printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
+	if (monarch_cpu == smp_processor_id())
+		ia64_mlogbuf_finish(0);
+	mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
 	while (1)
 		cpu_relax();
 }
@@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type)
 	}
 	if (!missing)
 		goto all_in;
-	printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
+	/*
+	 * Maybe slave(s) dead. Print buffered messages immediately.
+	 */
+	ia64_mlogbuf_finish(0);
+	mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
 	for_each_online_cpu(c) {
 		if (c == monarch)
 			continue;
 		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
-			printk(" %d", c);
+			mprintk(" %d", c);
 	}
-	printk("\n");
+	mprintk("\n");
 	return;
 
 all_in:
-	printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
+	mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
 	return;
 }
 
@@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
 
-	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
-	console_loglevel = 15;	/* make sure printks make it to console */
-	printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
-		sos->proc_state_param, cpu, sos->monarch);
+	mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
+		"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
 
 	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
 	monarch_cpu = cpu;
@@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		rh->severity = sal_log_severity_corrected;
 		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
 		sos->os_status = IA64_MCA_CORRECTED;
+	} else {
+		/* Dump buffered message to console */
+		ia64_mlogbuf_finish(1);
 	}
 	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
 			== NOTIFY_STOP)
@@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
 	struct task_struct *g, *t;
 	if (val != DIE_INIT_MONARCH_PROCESS)
 		return NOTIFY_DONE;
+
+	/*
+	 * FIXME: mlogbuf will brim over with INIT stack dumps.
+	 * To enable show_stack from INIT, we use oops_in_progress which should
+	 * be used in real oops. This would cause something wrong after INIT.
+	 */
+	BREAK_LOGLEVEL(console_loglevel);
+	ia64_mlogbuf_dump_from_init();
+
 	printk(KERN_ERR "Processes interrupted by INIT -");
 	for_each_online_cpu(c) {
 		struct ia64_sal_os_state *s;
@@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
 		} while_each_thread (g, t);
 		read_unlock(&tasklist_lock);
 	}
+	/* FIXME: This will not restore zapped printk locks. */
+	RESTORE_LOGLEVEL(console_loglevel);
 	return NOTIFY_DONE;
 }
 
@@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
 
-	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
-	console_loglevel = 15;	/* make sure printks make it to console */
-
 	(void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
 
-	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+	mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
 		sos->proc_state_param, cpu, sos->monarch);
 	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
@@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * fix their proms and get their customers updated.
 	 */
 	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
-		printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
 		       __FUNCTION__, cpu);
 		atomic_dec(&slaves);
 		sos->monarch = 1;
@@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * fix their proms and get their customers updated.
 	 */
 	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
-		printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+		mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
 			       __FUNCTION__, cpu);
 		atomic_dec(&monarchs);
 		sos->monarch = 0;
@@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 		if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
 				== NOTIFY_STOP)
 			ia64_mca_spin(__FUNCTION__);
-		printk("Slave on cpu %d returning to normal service.\n", cpu);
+		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
 		set_curr_task(cpu, previous_current);
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 		atomic_dec(&slaves);
@@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * same serial line, the user will need some time to switch out of the BMC before
 	 * the dump begins.
 	 */
-	printk("Delaying for 5 seconds...\n");
+	mprintk("Delaying for 5 seconds...\n");
 	udelay(5*1000000);
 	ia64_wait_for_slaves(cpu, "INIT");
 	/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
@@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
 			== NOTIFY_STOP)
 		ia64_mca_spin(__FUNCTION__);
-	printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
 	atomic_dec(&monarchs);
 	set_curr_task(cpu, previous_current);
 	monarch_cpu = -1;
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 8db6e0c..a45009d 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -79,14 +79,30 @@ static int
 fatal_mca(const char *fmt, ...)
 {
 	va_list args;
+	char buf[256];
 
 	va_start(args, fmt);
-	vprintk(fmt, args);
+	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
+	ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
 
 	return MCA_NOT_RECOVERED;
 }
 
+static int
+mca_recovered(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+	return MCA_RECOVERED;
+}
+
 /**
  * mca_page_isolate - isolate a poisoned page in order not to use it later
  * @paddr:	poisoned memory location
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr)
 void
 mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
 {
+	ia64_mlogbuf_dump();
 	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
 		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
 		raw_smp_processor_id(), current->pid, current->uid,
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx,
 
 	/* Is target address valid? */
 	if (!pbci->tv)
-		return fatal_mca(KERN_ALERT "MCA: target address not valid\n");
+		return fatal_mca("target address not valid");
 
 	/*
 	 * cpu read or memory-mapped io read
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx,
 
 	/* Is minstate valid? */
 	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
-		return fatal_mca(KERN_ALERT "MCA: minstate not valid\n");
+		return fatal_mca("minstate not valid");
 	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
 	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
 
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx,
 			psr2->bn  = 1;
 			psr2->i  = 0;
 
-			return MCA_RECOVERED;
+			return mca_recovered("user memory corruption. "
+				"kill affected process - recovered.");
 		}
 
 	}
 
-	return fatal_mca(KERN_ALERT "MCA: kernel context not recovered,"
-			  " iip 0x%lx\n", pmsa->pmsa_iip);
+	return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+			 pmsa->pmsa_iip);
 }
 
 /**
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 	 * The machine check is corrected.
 	 */
 	if (psp->cm == 1)
-		return MCA_RECOVERED;
+		return mca_recovered("machine check is already corrected.");
 
 	/*
 	 * The error was not contained.  Software must be reset.
 	 */
 	if (psp->us || psp->ci == 0)
-		return fatal_mca(KERN_ALERT "MCA: error not contained\n");
+		return fatal_mca("error not contained");
 
 	/*
 	 * The cache check and bus check bits have four possible states
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 	 *    1  1	Memory error, attempt recovery
 	 */
 	if (psp->bc == 0 || pbci == NULL)
-		return fatal_mca(KERN_ALERT "MCA: No bus check\n");
+		return fatal_mca("No bus check");
 
 	/*
 	 * Sorry, we cannot handle so many.
 	 */
 	if (peidx_bus_check_num(peidx) > 1)
-		return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n");
+		return fatal_mca("Too many bus checks");
 	/*
 	 * Well, here is only one bus error.
 	 */
 	if (pbci->ib)
-		return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n");
+		return fatal_mca("Internal Bus error");
 	if (pbci->cc)
-		return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n");
+		return fatal_mca("Cache-cache error");
 	if (pbci->eb && pbci->bsi > 0)
-		return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n");
+		return fatal_mca("External bus check fatal status");
 
 	/*
 	 * This is a local MCA and estimated as recoverble external bus error.
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 	/*
 	 * On account of strange SAL error record, we cannot recover.
 	 */
-	return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n");
+	return fatal_mca("Strange SAL record");
 }
 
 /**
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
 	 /* Now, OS can recover when there is one processor error section */
 	if (n_proc_err > 1)
-		return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n");
+		return fatal_mca("Too Many Errors");
 	else if (n_proc_err == 0)
-		/* Weird SAL record ... We need not to recover */
-		return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n");
+		/* Weird SAL record ... We can't do anything */
+		return fatal_mca("Weird SAL record");
 
 	/* Make index of processor error section */
 	mca_make_peidx((sal_log_processor_info_t*)
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
 	/* Check whether MCA is global or not */
 	if (is_mca_global(&peidx, &pbci, sos))
-		return fatal_mca(KERN_ALERT "MCA: global MCA\n");
+		return fatal_mca("global MCA");
 	
 	/* Try to recover a processor error */
 	return recover_from_processor_error(platform_err, &slidx, &peidx,
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index 31a2e52..c85e943 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -118,3 +118,7 @@ struct mca_table_entry {
 
 extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
 extern int mca_recover_range(unsigned long);
+extern void ia64_mca_printk(const char * fmt, ...)
+	 __attribute__ ((format (printf, 1, 2)));
+extern void ia64_mlogbuf_dump(void);
+
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 9065f0f..e63b8ca 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 /* Check for outstanding MCA/INIT records every minute (arbitrary) */
 #define SALINFO_TIMER_DELAY (60*HZ)
 static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
 
 static void
 salinfo_timeout_check(struct salinfo_data *data)
@@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 static void
 salinfo_timeout (unsigned long arg)
 {
+	ia64_mlogbuf_dump();
 	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
 	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
 	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
@@ -332,6 +334,8 @@ retry:
 	if (cpu == -1)
 		goto retry;
 
+	ia64_mlogbuf_dump();
+
 	/* for next read, start checking at next CPU */
 	data->cpu_check = cpu;
 	if (++data->cpu_check == NR_CPUS)
-- 
cgit v0.10.2


From 8f9e146732dcba5161dad3747ee73be1f8c13133 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Mon, 18 Sep 2006 18:37:15 -0500
Subject: [IA64] ar.fpsr not set on MCA/INIT kernel entry

When entering the kernel due to an MCA or INIT, ar.fpsr (ar40)
was not getting set to the kernel default value (remaining
at the user value).  The effect depends on the user setting
of ar.fpsr.  In the test case, the effect was addresses
printing with strange hex values.

Setting ar.fpsr in ia64_set_kernel_registers sets it for both
the MCA and INIT paths.  The user value of ar.fpsr is correctly
saved (in ia64_state_save) and restored (in ia64_state_restore).

Below is an example of output with very strange hex values.
Anyone know the value of hex 'g'?  :-)

Processes interrupted by INIT - 0 (cpu 14 task 0xdfffg55g7a4c6gA)

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index ee3ff76..c6b607c 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -1062,6 +1062,10 @@ ia64_set_kernel_registers:
 	mov cr.itir=r18
 	mov cr.ifa=r13
 	mov r20=IA64_TR_CURRENT_STACK
+
+	movl r17=FPSR_DEFAULT
+	;;
+	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
 	;;
 	itr.d dtr[r20]=r21
 	;;
-- 
cgit v0.10.2


From b29e7132b5a9f2496beed37beef7ba4d010afb2c Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Tue, 26 Sep 2006 14:47:48 -0500
Subject: [IA64] PAL calls need physical mode, stacked

PAL_CACHE_READ and PAL_CACHE_WRITE need to be called in physical
mode with stacked registers.

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index d1587e4..2c8fd92 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -964,7 +964,8 @@ static inline s64
 ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr)
 {
 	struct ia64_pal_retval iprv;
-	PAL_CALL(iprv, PAL_CACHE_READ, line_id.pclid_data, physical_addr, 0);
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data,
+				physical_addr, 0);
 	return iprv.status;
 }
 
@@ -986,7 +987,8 @@ static inline s64
 ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data)
 {
 	struct ia64_pal_retval iprv;
-	PAL_CALL(iprv, PAL_CACHE_WRITE, line_id.pclid_data, physical_addr, data);
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data,
+				physical_addr, data);
 	return iprv.status;
 }
 
-- 
cgit v0.10.2


From ddb4f0df0424d174567a011a176782ffa4202071 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 26 Sep 2006 15:27:56 -0700
Subject: [IA64] CMC/CPE: Reverse the order of fetching log and checking poll
 threshold

This patch reverses the order of fetching log from SAL and
checking poll threshold. This will fix following trivial issues:

- If SAL_GET_SATE_INFO is unbelievably slow (due to huge system
   or just its silly implementation) and if it takes more than
   1/5 sec, CMCI/CPEI will never switch to CMCP/CPEP.
- Assuming terrible flood of interrupt (continuous corrected
   errors let all CPUs enter to handler at once and bind them
   in it), CPUs will be serialized by IA64_LOG_LOCK(*).
   Now we check the poll threshold after the lock and log fetch,
   so we need to call SAL_GET_STATE_INFO (num_online_cpus() + 4)
   times in the worst case.
   if we can check the threshold before the lock, we can shut up
   interrupts quickly without waiting preceding log fetches, and
   the number of times will be reduced to (num_online_cpus()) in
   the same situation.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 98f3b26..bfbd898 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -511,9 +511,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 	/* SAL spec states this should run w/ interrupts enabled */
 	local_irq_enable();
 
-	/* Get the CPE error record and log it */
-	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
-
 	spin_lock(&cpe_history_lock);
 	if (!cpe_poll_enabled && cpe_vector >= 0) {
 
@@ -542,7 +539,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 			mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
 
 			/* lock already released, get out now */
-			return IRQ_HANDLED;
+			goto out;
 		} else {
 			cpe_history[index++] = now;
 			if (index == CPE_HISTORY_LENGTH)
@@ -550,6 +547,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 		}
 	}
 	spin_unlock(&cpe_history_lock);
+out:
+	/* Get the CPE error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+
 	return IRQ_HANDLED;
 }
 
@@ -1278,9 +1279,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 	/* SAL spec states this should run w/ interrupts enabled */
 	local_irq_enable();
 
-	/* Get the CMC error record and log it */
-	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
-
 	spin_lock(&cmc_history_lock);
 	if (!cmc_polling_enabled) {
 		int i, count = 1; /* we know 1 happened now */
@@ -1313,7 +1311,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
 
 			/* lock already released, get out now */
-			return IRQ_HANDLED;
+			goto out;
 		} else {
 			cmc_history[index++] = now;
 			if (index == CMC_HISTORY_LENGTH)
@@ -1321,6 +1319,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 		}
 	}
 	spin_unlock(&cmc_history_lock);
+out:
+	/* Get the CMC error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+
 	return IRQ_HANDLED;
 }
 
-- 
cgit v0.10.2


From df8f0ec1a413ae610899f009e25dc9777881e149 Mon Sep 17 00:00:00 2001
From: Al Stone <ahs3@fc.hp.com>
Date: Sat, 12 Aug 2006 11:08:12 -0600
Subject: [IA64] minor reformatting to vmlinux.lds.S

Minor reformatting to vmlinux.lds.S to make it 80-column usable,
in accordance with Linux coding style.

Signed-off-by: Al Stone <ahs3@fc.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5b0d5f6..b3b2e38 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -184,7 +184,9 @@ SECTIONS
 	  *(.data.gate)
 	  __stop_gate_section = .;
 	}
-  . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose kernel data */
+  . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose
+  				 * kernel data
+				 */
 
   .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
         { *(.data.read_mostly) }
@@ -202,7 +204,9 @@ SECTIONS
 		*(.data.percpu)
 		__per_cpu_end = .;
 	}
-  . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits into percpu page size */
+  . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits
+  						 * into percpu page size
+						 */
 
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
-- 
cgit v0.10.2