75 files changed, 14534 insertions, 7059 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8fa3faf..0df5f6f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -19,6 +19,8 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_DMA_ATTRS
+	select HAVE_KVM
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -46,6 +48,9 @@ config MMU
 config SWIOTLB
        bool
 
+config IOMMU_HELPER
+       bool
+
 config GENERIC_LOCKBREAK
 	bool
 	default y
@@ -266,23 +271,23 @@ config IOSAPIC
 	depends on !IA64_HP_SIM
 	default y
 
-config IA64_SGI_SN_XP
-	tristate "Support communication between SGI SSIs"
-	depends on IA64_GENERIC || IA64_SGI_SN2
-	select IA64_UNCACHED_ALLOCATOR
-	help
-	  An SGI machine can be divided into multiple Single System
-	  Images which act independently of each other and have
-	  hardware based memory protection from the others.  Enabling
-	  this feature will allow for direct communication between SSIs
-	  based on a network adapter and DMA messaging.
-
 config FORCE_MAX_ZONEORDER
 	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
 	range 11 17  if !HUGETLB_PAGE
 	default "17" if HUGETLB_PAGE
 	default "11"
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	help
@@ -589,6 +594,8 @@ config MSPEC
 
 source "fs/Kconfig"
 
+source "arch/ia64/kvm/Kconfig"
+
 source "lib/Kconfig"
 
 #
@@ -611,6 +618,9 @@ config IRQ_PER_CPU
 	bool
 	default y
 
+config IOMMU_HELPER
+	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
 source "arch/ia64/hp/sim/Kconfig"
 
 source "arch/ia64/Kconfig.debug"
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f1645c4..ec4cca4 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 8f6bcfe..1c44ec2 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -20,10 +20,10 @@
 extern int swiotlb_late_init_with_default_size (size_t size);
 extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
 extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
-extern ia64_mv_dma_map_single		swiotlb_map_single;
-extern ia64_mv_dma_unmap_single		swiotlb_unmap_single;
-extern ia64_mv_dma_map_sg		swiotlb_map_sg;
-extern ia64_mv_dma_unmap_sg		swiotlb_unmap_sg;
+extern ia64_mv_dma_map_single_attrs	swiotlb_map_single_attrs;
+extern ia64_mv_dma_unmap_single_attrs	swiotlb_unmap_single_attrs;
+extern ia64_mv_dma_map_sg_attrs		swiotlb_map_sg_attrs;
+extern ia64_mv_dma_unmap_sg_attrs	swiotlb_unmap_sg_attrs;
 extern ia64_mv_dma_supported		swiotlb_dma_supported;
 extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
 
@@ -31,19 +31,19 @@ extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
 
 extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
 extern ia64_mv_dma_free_coherent	sba_free_coherent;
-extern ia64_mv_dma_map_single		sba_map_single;
-extern ia64_mv_dma_unmap_single		sba_unmap_single;
-extern ia64_mv_dma_map_sg		sba_map_sg;
-extern ia64_mv_dma_unmap_sg		sba_unmap_sg;
+extern ia64_mv_dma_map_single_attrs	sba_map_single_attrs;
+extern ia64_mv_dma_unmap_single_attrs	sba_unmap_single_attrs;
+extern ia64_mv_dma_map_sg_attrs		sba_map_sg_attrs;
+extern ia64_mv_dma_unmap_sg_attrs	sba_unmap_sg_attrs;
 extern ia64_mv_dma_supported		sba_dma_supported;
 extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
 
 #define hwiommu_alloc_coherent		sba_alloc_coherent
 #define hwiommu_free_coherent		sba_free_coherent
-#define hwiommu_map_single		sba_map_single
-#define hwiommu_unmap_single		sba_unmap_single
-#define hwiommu_map_sg			sba_map_sg
-#define hwiommu_unmap_sg		sba_unmap_sg
+#define hwiommu_map_single_attrs	sba_map_single_attrs
+#define hwiommu_unmap_single_attrs	sba_unmap_single_attrs
+#define hwiommu_map_sg_attrs		sba_map_sg_attrs
+#define hwiommu_unmap_sg_attrs		sba_unmap_sg_attrs
 #define hwiommu_dma_supported		sba_dma_supported
 #define hwiommu_dma_mapping_error	sba_dma_mapping_error
 #define hwiommu_sync_single_for_cpu	machvec_dma_sync_single
@@ -98,41 +98,48 @@ hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma
 }
 
 dma_addr_t
-hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
+hwsw_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+		       struct dma_attrs *attrs)
 {
 	if (use_swiotlb(dev))
-		return swiotlb_map_single(dev, addr, size, dir);
+		return swiotlb_map_single_attrs(dev, addr, size, dir, attrs);
 	else
-		return hwiommu_map_single(dev, addr, size, dir);
+		return hwiommu_map_single_attrs(dev, addr, size, dir, attrs);
 }
+EXPORT_SYMBOL(hwsw_map_single_attrs);
 
 void
-hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
+hwsw_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+			 int dir, struct dma_attrs *attrs)
 {
 	if (use_swiotlb(dev))
-		return swiotlb_unmap_single(dev, iova, size, dir);
+		return swiotlb_unmap_single_attrs(dev, iova, size, dir, attrs);
 	else
-		return hwiommu_unmap_single(dev, iova, size, dir);
+		return hwiommu_unmap_single_attrs(dev, iova, size, dir, attrs);
 }
-
+EXPORT_SYMBOL(hwsw_unmap_single_attrs);
 
 int
-hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+hwsw_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+		   int dir, struct dma_attrs *attrs)
 {
 	if (use_swiotlb(dev))
-		return swiotlb_map_sg(dev, sglist, nents, dir);
+		return swiotlb_map_sg_attrs(dev, sglist, nents, dir, attrs);
 	else
-		return hwiommu_map_sg(dev, sglist, nents, dir);
+		return hwiommu_map_sg_attrs(dev, sglist, nents, dir, attrs);
 }
+EXPORT_SYMBOL(hwsw_map_sg_attrs);
 
 void
-hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+		     int dir, struct dma_attrs *attrs)
 {
 	if (use_swiotlb(dev))
-		return swiotlb_unmap_sg(dev, sglist, nents, dir);
+		return swiotlb_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
 	else
-		return hwiommu_unmap_sg(dev, sglist, nents, dir);
+		return hwiommu_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
 }
+EXPORT_SYMBOL(hwsw_unmap_sg_attrs);
 
 void
 hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
@@ -185,10 +192,6 @@ hwsw_dma_mapping_error (dma_addr_t dma_addr)
 }
 
 EXPORT_SYMBOL(hwsw_dma_mapping_error);
-EXPORT_SYMBOL(hwsw_map_single);
-EXPORT_SYMBOL(hwsw_unmap_single);
-EXPORT_SYMBOL(hwsw_map_sg);
-EXPORT_SYMBOL(hwsw_unmap_sg);
 EXPORT_SYMBOL(hwsw_dma_supported);
 EXPORT_SYMBOL(hwsw_alloc_coherent);
 EXPORT_SYMBOL(hwsw_free_coherent);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 523eae6..34421ae 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -35,6 +35,7 @@
 #include <linux/nodemask.h>
 #include <linux/bitops.h>         /* hweight64() */
 #include <linux/crash_dump.h>
+#include <linux/iommu-helper.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
@@ -460,6 +461,13 @@ get_iovp_order (unsigned long size)
 	return order;
 }
 
+static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
+				 unsigned int bitshiftcnt)
+{
+	return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+		+ bitshiftcnt;
+}
+
 /**
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
@@ -471,15 +479,25 @@ get_iovp_order (unsigned long size)
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
+sba_search_bitmap(struct ioc *ioc, struct device *dev,
+		  unsigned long bits_wanted, int use_hint)
 {
 	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long flags, pide = ~0UL;
+	unsigned long flags, pide = ~0UL, tpide;
+	unsigned long boundary_size;
+	unsigned long shift;
+	int ret;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
+	boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
+
+	BUG_ON(ioc->ibase & ~iovp_mask);
+	shift = ioc->ibase >> iovp_shift;
+
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
 	/* Allow caller to force a search through the entire resource space */
@@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			if (likely(*res_ptr != ~0UL)) {
 				bitshiftcnt = ffz(*res_ptr);
 				*res_ptr |= (1UL << bitshiftcnt);
-				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-				pide <<= 3;	/* convert to bit address */
-				pide += bitshiftcnt;
+				pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
 				ioc->res_bitshift = bitshiftcnt + bits_wanted;
 				goto found_it;
 			}
@@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
 			ASSERT(0 != mask);
 			for (; mask ; mask <<= o, bitshiftcnt += o) {
-				if(0 == ((*res_ptr) & mask)) {
+				tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ret = iommu_is_span_boundary(tpide, bits_wanted,
+							     shift,
+							     boundary_size);
+				if ((0 == ((*res_ptr) & mask)) && !ret) {
 					*res_ptr |= mask;     /* mark resources busy! */
-					pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-					pide <<= 3;	/* convert to bit address */
-					pide += bitshiftcnt;
+					pide = tpide;
 					ioc->res_bitshift = bitshiftcnt + bits_wanted;
 					goto found_it;
 				}
@@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 		end = res_end - qwords;
 
 		for (; res_ptr < end; res_ptr++) {
+			tpide = ptr_to_pide(ioc, res_ptr, 0);
+			ret = iommu_is_span_boundary(tpide, bits_wanted,
+						     shift, boundary_size);
+			if (ret)
+				goto next_ptr;
 			for (i = 0 ; i < qwords ; i++) {
 				if (res_ptr[i] != 0)
 					goto next_ptr;
@@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 				res_ptr[i] = ~0UL;
 			res_ptr[i] |= RESMAP_MASK(bits);
 
-			pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-			pide <<= 3;	/* convert to bit address */
+			pide = tpide;
 			res_ptr += qwords;
 			ioc->res_bitshift = bits;
 			goto found_it;
@@ -605,7 +627,7 @@ found_it:
  * resource bit map.
  */
 static int
-sba_alloc_range(struct ioc *ioc, size_t size)
+sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 {
 	unsigned int pages_needed = size >> iovp_shift;
 #ifdef PDIR_SEARCH_TIMING
@@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed, 1);
+	pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed, 0);
+		pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
 			unsigned long flags;
@@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 			}
 			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed, 0);
+			pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 			if (unlikely(pide >= (ioc->res_size << 3)))
 				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
 				      ioc->ioc_hpa);
@@ -877,16 +899,18 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
 }
 
 /**
- * sba_map_single - map one buffer and return IOVA for DMA
+ * sba_map_single_attrs - map one buffer and return IOVA for DMA
  * @dev: instance of PCI owned by the driver that's asking.
  * @addr:  driver buffer to map.
  * @size:  number of bytes to map in driver buffer.
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
  * See Documentation/DMA-mapping.txt
  */
 dma_addr_t
-sba_map_single(struct device *dev, void *addr, size_t size, int dir)
+sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+		     struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 	dma_addr_t iovp;
@@ -910,7 +934,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
  		** Device is bit capable of DMA'ing to the buffer...
 		** just return the PCI address of ptr
  		*/
-		DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
+		DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: "
+			   "0x%lx/0x%lx\n",
 		           to_pci_dev(dev)->dma_mask, pci_addr);
 		return pci_addr;
 	}
@@ -931,12 +956,12 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_single()"))
+	if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()"))
 		panic("Sanity check failed");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
-	pide = sba_alloc_range(ioc, size);
+	pide = sba_alloc_range(ioc, dev, size);
 
 	iovp = (dma_addr_t) pide << iovp_shift;
 
@@ -960,11 +985,12 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 	/* form complete address */
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_map_single()");
+	sba_check_pdir(ioc,"Check after sba_map_single_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 	return SBA_IOVA(ioc, iovp, offset);
 }
+EXPORT_SYMBOL(sba_map_single_attrs);
 
 #ifdef ENABLE_MARK_CLEAN
 static SBA_INLINE void
@@ -991,15 +1017,17 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
 #endif
 
 /**
- * sba_unmap_single - unmap one IOVA and free resources
+ * sba_unmap_single_attrs - unmap one IOVA and free resources
  * @dev: instance of PCI owned by the driver that's asking.
  * @iova:  IOVA of driver buffer previously mapped.
  * @size:  number of bytes mapped in driver buffer.
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
  * See Documentation/DMA-mapping.txt
  */
-void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+			    int dir, struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 #if DELAYED_RESOURCE_CNT > 0
@@ -1016,7 +1044,8 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 		/*
 		** Address does not fall w/in IOVA, must be bypassing
 		*/
-		DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
+		DBG_BYPASS("sba_unmap_single_atttrs() bypass addr: 0x%lx\n",
+			   iova);
 
 #ifdef ENABLE_MARK_CLEAN
 		if (dir == DMA_FROM_DEVICE) {
@@ -1065,7 +1094,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif /* DELAYED_RESOURCE_CNT == 0 */
 }
-
+EXPORT_SYMBOL(sba_unmap_single_attrs);
 
 /**
  * sba_alloc_coherent - allocate/map shared mem for DMA
@@ -1122,7 +1151,8 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
 	 * If device can't bypass or bypass is disabled, pass the 32bit fake
 	 * device to map single to get an iova mapping.
 	 */
-	*dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
+	*dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
+					   size, 0, NULL);
 
 	return addr;
 }
@@ -1139,7 +1169,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
  */
 void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
 {
-	sba_unmap_single(dev, dma_handle, size, 0);
+	sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
 	free_pages((unsigned long) vaddr, get_order(size));
 }
 
@@ -1373,7 +1403,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
 		ASSERT(dma_len <= DMA_CHUNK_SIZE);
 		dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
-			| (sba_alloc_range(ioc, dma_len) << iovp_shift)
+			| (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
 			| dma_offset);
 		n_mappings++;
 	}
@@ -1388,10 +1418,12 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
  * @sglist:  array of buffer/length pairs
  * @nents:  number of entries in list
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
  * See Documentation/DMA-mapping.txt
  */
-int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
+int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+		     int dir, struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 	int coalesced, filled = 0;
@@ -1419,16 +1451,16 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 	/* Fast path single entry scatterlists. */
 	if (nents == 1) {
 		sglist->dma_length = sglist->length;
-		sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
+		sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs);
 		return 1;
 	}
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
+	if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()"))
 	{
 		sba_dump_sg(ioc, sglist, nents);
-		panic("Check before sba_map_sg()");
+		panic("Check before sba_map_sg_attrs()");
 	}
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
@@ -1457,10 +1489,10 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
+	if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()"))
 	{
 		sba_dump_sg(ioc, sglist, nents);
-		panic("Check after sba_map_sg()\n");
+		panic("Check after sba_map_sg_attrs()\n");
 	}
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
@@ -1470,18 +1502,20 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 
 	return filled;
 }
-
+EXPORT_SYMBOL(sba_map_sg_attrs);
 
 /**
- * sba_unmap_sg - unmap Scatter/Gather list
+ * sba_unmap_sg_attrs - unmap Scatter/Gather list
  * @dev: instance of PCI owned by the driver that's asking.
  * @sglist:  array of buffer/length pairs
  * @nents:  number of entries in list
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
  * See Documentation/DMA-mapping.txt
  */
-void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			int nents, int dir, struct dma_attrs *attrs)
 {
 #ifdef ASSERT_PDIR_SANITY
 	struct ioc *ioc;
@@ -1496,13 +1530,14 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
 	ASSERT(ioc);
 
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check before sba_unmap_sg()");
+	sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
 	while (nents && sglist->dma_length) {
 
-		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
+		sba_unmap_single_attrs(dev, sglist->dma_address,
+				       sglist->dma_length, dir, attrs);
 		sglist = sg_next(sglist);
 		nents--;
 	}
@@ -1511,11 +1546,12 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_unmap_sg()");
+	sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
 }
+EXPORT_SYMBOL(sba_unmap_sg_attrs);
 
 /**************************************************************
 *
@@ -1896,15 +1932,13 @@ static const struct file_operations ioc_fops = {
 static void __init
 ioc_proc_init(void)
 {
-	struct proc_dir_entry *dir, *entry;
+	struct proc_dir_entry *dir;
 
 	dir = proc_mkdir("bus/mckinley", NULL);
 	if (!dir)
 		return;
 
-	entry = create_proc_entry(ioc_list->name, 0, dir);
-	if (entry)
-		entry->proc_fops = &ioc_fops;
+	proc_create(ioc_list->name, 0, dir, &ioc_fops);
 }
 #endif
 
@@ -2144,10 +2178,6 @@ sba_page_override(char *str)
 __setup("sbapagesize=",sba_page_override);
 
 EXPORT_SYMBOL(sba_dma_mapping_error);
-EXPORT_SYMBOL(sba_map_single);
-EXPORT_SYMBOL(sba_unmap_single);
-EXPORT_SYMBOL(sba_map_sg);
-EXPORT_SYMBOL(sba_unmap_sg);
 EXPORT_SYMBOL(sba_dma_supported);
 EXPORT_SYMBOL(sba_alloc_coherent);
 EXPORT_SYMBOL(sba_free_coherent);
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 969fe9f4..3d47839 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -294,7 +294,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 		return NOTIFY_DONE;
 	}
 
-	if (dev->nd_net != &init_net)
+	if (dev_net(dev) != &init_net)
 		return NOTIFY_DONE;
 
 	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 7661bb0..3a078ad 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -201,22 +201,6 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 	simscsi_sg_readwrite(sc, mode, offset);
 }
 
-static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
-{
-
-	int i;
-	unsigned thislen;
-	struct scatterlist *slp;
-
-	scsi_for_each_sg(sc, slp, scsi_sg_count(sc), i) {
-		if (!len)
-			break;
-		thislen = min(len, slp->length);
-		memcpy(sg_virt(slp), buf, thislen);
-		len -= thislen;
-	}
-}
-
 static int
 simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
@@ -258,7 +242,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
-			simscsi_fillresult(sc, buf, 36);
+			scsi_sg_copy_from_buffer(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -306,14 +290,15 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
-			simscsi_fillresult(sc, buf, 8);
+			scsi_sg_copy_from_buffer(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			simscsi_fillresult(sc, (char *)empty_zero_page, scsi_bufflen(sc));
+			scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page,
+						 PAGE_SIZE);
 			sc->result = GOOD;
 			break;
 
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index eb0c32a..23cafc8 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -210,21 +210,23 @@ static void do_softint(struct work_struct *private_)
 	printk(KERN_ERR "simserial: do_softint called\n");
 }
 
-static void rs_put_char(struct tty_struct *tty, unsigned char ch)
+static int rs_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	struct async_struct *info = (struct async_struct *)tty->driver_data;
 	unsigned long flags;
 
-	if (!tty || !info->xmit.buf) return;
+	if (!tty || !info->xmit.buf)
+		return 0;
 
 	local_irq_save(flags);
 	if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
 		local_irq_restore(flags);
-		return;
+		return 0;
 	}
 	info->xmit.buf[info->xmit.head] = ch;
 	info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
 	local_irq_restore(flags);
+	return 1;
 }
 
 static void transmit_chars(struct async_struct *info, int *intr_done)
@@ -621,7 +623,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 	 * the line discipline to only process XON/XOFF characters.
 	 */
 	shutdown(info);
-	if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty);
+	if (tty->ops->flush_buffer)
+		tty->ops->flush_buffer(tty);
 	if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
 	info->event = 0;
 	info->tty = NULL;
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
index 446c9aa..9a3abf58 100644
--- a/arch/ia64/ia32/elfcore32.h
+++ b/arch/ia64/ia32/elfcore32.h
@@ -30,7 +30,19 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Hacks are here since types between compat_timeval (= pair of s32) and
+ * ia64-native timeval (= pair of s64) are not compatible, at least a file
+ * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
+ * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
+ */
+#define cputime_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
+#else
+#define jiffies_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
+#endif
 
 struct elf_prstatus
 {
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index b1bf51fe..7e028ce 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -38,6 +38,7 @@
 #include <linux/eventpoll.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
 #include <linux/stat.h>
 #include <linux/ipc.h>
 #include <linux/capability.h>
@@ -2387,16 +2388,45 @@ get_free_idx (void)
 	return -ESRCH;
 }
 
+static void set_tls_desc(struct task_struct *p, int idx,
+		const struct ia32_user_desc *info, int n)
+{
+	struct thread_struct *t = &p->thread;
+	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	int cpu;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	while (n-- > 0) {
+		if (LDT_empty(info)) {
+			desc->a = 0;
+			desc->b = 0;
+		} else {
+			desc->a = LDT_entry_a(info);
+			desc->b = LDT_entry_b(info);
+		}
+
+		++info;
+		++desc;
+	}
+
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+}
+
 /*
  * Set a given TLS descriptor:
  */
 asmlinkage int
 sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 {
-	struct thread_struct *t = &current->thread;
 	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
+	int idx;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	cpu = smp_processor_id();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
+	set_tls_desc(current, idx, &info, 1);
 	return 0;
 }
 
@@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 #define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
 #define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
 
+static void fill_user_desc(struct ia32_user_desc *info, int idx,
+		const struct desc_struct *desc)
+{
+	info->entry_number = idx;
+	info->base_addr = GET_BASE(desc);
+	info->limit = GET_LIMIT(desc);
+	info->seg_32bit = GET_32BIT(desc);
+	info->contents = GET_CONTENTS(desc);
+	info->read_exec_only = !GET_WRITABLE(desc);
+	info->limit_in_pages = GET_LIMIT_PAGES(desc);
+	info->seg_not_present = !GET_PRESENT(desc);
+	info->useable = GET_USEABLE(desc);
+}
+
 asmlinkage int
 sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 {
@@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 		return -EINVAL;
 
 	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
+	fill_user_desc(&info, idx, desc);
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static void getfpreg(struct task_struct *task, int regno, int *val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		*val = task->thread.fcr & 0xffff;
+		break;
+	case 1:
+		*val = task->thread.fsr & 0xffff;
+		break;
+	case 2:
+		*val = (task->thread.fsr>>16) & 0xffff;
+		break;
+	case 3:
+		*val = task->thread.fir;
+		break;
+	case 4:
+		*val = (task->thread.fir>>32) & 0xffff;
+		break;
+	case 5:
+		*val = task->thread.fdr;
+		break;
+	case 6:
+		*val = (task->thread.fdr >> 32) & 0xffff;
+		break;
+	}
+}
+
+static void setfpreg(struct task_struct *task, int regno, int val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+			| (val & 0x1f3f);
+		break;
+	case 1:
+		task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
+		break;
+	case 2:
+		task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+			| (val << 16);
+		break;
+	case 3:
+		task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
+		break;
+	case 5:
+		task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
+		break;
+	}
+}
+
+static void access_fpreg_ia32(int regno, void *reg,
+		struct pt_regs *pt, struct switch_stack *sw,
+		int tos, int write)
+{
+	void *f;
+
+	if ((regno += tos) >= 8)
+		regno -= 8;
+	if (regno < 4)
+		f = &pt->f8 + regno;
+	else if (regno <= 7)
+		f = &sw->f12 + (regno - 4);
+	else {
+		printk(KERN_ERR "regno must be less than 7 \n");
+		 return;
+	}
+
+	if (write)
+		memcpy(f, reg, sizeof(struct _fpreg_ia32));
+	else
+		memcpy(reg, f, sizeof(struct _fpreg_ia32));
+}
+
+static void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	int start, end, tos;
+	char buf[80];
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < 7 * sizeof(int)) {
+		end = min((dst->pos + dst->count),
+			(unsigned int)(7 * sizeof(int)));
+		for (start = dst->pos; start < end; start += sizeof(int))
+			getfpreg(task, start, (int *)(buf + start));
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+			(unsigned int)(sizeof(struct ia32_user_i387_struct)));
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+}
+
+static void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[80];
+	int end, start, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < 7 * sizeof(int)) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret)
+			return;
+		for (; start < dst->pos; start += sizeof(int))
+			setfpreg(task, start, *((int *)(buf + start)));
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret)
+			return;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+}
+
+#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
+static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		if (start == OFFSET(cwd))
+			*((short *)buf) = task->thread.fcr & 0xffff;
+		else if (start == OFFSET(swd))
+			*((short *)buf) = task->thread.fsr & 0xffff;
+		else if (start == OFFSET(twd))
+			*((short *)buf) = (task->thread.fsr>>16) & 0xffff;
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		if (start == OFFSET(fip))
+			*((int *)buf) = task->thread.fir;
+		else if (start == OFFSET(fcs))
+			*((int *)buf) = (task->thread.fir>>32) & 0xffff;
+		else if (start == OFFSET(foo))
+			*((int *)buf) = task->thread.fdr;
+		else if (start == OFFSET(fos))
+			*((int *)buf) = (task->thread.fdr>>32) & 0xffff;
+		else if (start == OFFSET(mxcsr))
+			*((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
+					 | ((task->thread.fsr>>32) & 0x3f);
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val, num32;
+	short num;
+	unsigned long num64;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		num = *((short *)buf);
+		if (start == OFFSET(cwd)) {
+			task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+						| (num & 0x1f3f);
+		} else if (start == OFFSET(swd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
+		} else if (start == OFFSET(twd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+				| (((int)num) << 16);
+		}
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		num32 = *((int *)buf);
+		if (start == OFFSET(fip))
+			task->thread.fir = (task->thread.fir & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(foo))
+			task->thread.fdr = (task->thread.fdr & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(mxcsr)) {
+			num64 = num32 & 0xff10;
+			task->thread.fcr = (task->thread.fcr &
+				(~0xff1000000000UL)) | (num64<<32);
+			num64 = num32 & 0x3f;
+			task->thread.fsr = (task->thread.fsr &
+				(~0x3f00000000UL)) | (num64<<32);
+		}
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[128];
+	int start, end, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < OFFSET(st_space[0])) {
+		end = min(dst->pos + dst->count, (unsigned int)32);
+		getfpxreg(task, dst->pos, end, buf);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, OFFSET(st_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+				(unsigned int)OFFSET(xmm_space[0]));
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		end = (end - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt,
+						info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				OFFSET(padding[0]));
+}
+
+static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	char buf[128];
+	int start, end;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < OFFSET(st_space[0])) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 0, OFFSET(st_space[0]));
+		if (dst->ret)
+			return;
+		setfpxreg(task, start, dst->pos, buf);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		struct pt_regs *pt;
+		int tos;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret)
+			return;
+		end = (dst->pos - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
+						 tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				 OFFSET(padding[0]));
+}
+#undef OFFSET
+
+static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
+		struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+		.pos = pos, .count = count,
+		.u.set = { .kbuf = kbuf, .ubuf = ubuf },
+		.ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int ia32_fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_genregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		u32 *kp = kbuf;
+		while (count > 0) {
+			*kp++ = getreg(target, pos);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		u32 __user *up = ubuf;
+		while (count > 0) {
+			if (__put_user(getreg(target, pos), up++))
+				return -EFAULT;
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return 0;
+}
+
+static int ia32_genregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	if (kbuf) {
+		const u32 *kp = kbuf;
+		while (!ret && count > 0) {
+			putreg(target, pos, *kp++);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		const u32 __user *up = ubuf;
+		u32 val;
+		while (!ret && count > 0) {
+			ret = __get_user(val, up++);
+			if (!ret)
+				putreg(target, pos, val);
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return ret;
+}
+
+static int ia32_tls_active(struct task_struct *target,
+		const struct user_regset *regset)
+{
+	struct thread_struct *t = &target->thread;
+	int n = GDT_ENTRY_TLS_ENTRIES;
+	while (n > 0 && desc_empty(&t->tls_array[n -1]))
+		--n;
+	return n;
+}
+
+static int ia32_tls_get(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, void *kbuf, void __user *ubuf)
+{
+	const struct desc_struct *tls;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	pos /= sizeof(struct ia32_user_desc);
+	count /= sizeof(struct ia32_user_desc);
+
+	tls = &target->thread.tls_array[pos];
+
+	if (kbuf) {
+		struct ia32_user_desc *info = kbuf;
+		while (count-- > 0)
+			fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+					tls++);
+	} else {
+		struct ia32_user_desc __user *u_info = ubuf;
+		while (count-- > 0) {
+			struct ia32_user_desc info;
+			fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+			if (__copy_to_user(u_info++, &info, sizeof(info)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int ia32_tls_set(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, const void *kbuf, const void __user *ubuf)
+{
+	struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+	const struct ia32_user_desc *info;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	if (kbuf)
+		info = kbuf;
+	else if (__copy_from_user(infobuf, ubuf, count))
+		return -EFAULT;
+	else
+		info = infobuf;
+
+	set_tls_desc(target,
+		GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
+		info, count / sizeof(struct ia32_user_desc));
+
+	return 0;
+}
+
+/*
+ * This should match arch/i386/kernel/ptrace.c:native_regsets.
+ * XXX ioperm? vm86?
+ */
+static const struct user_regset ia32_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct32)/4,
+		.size = 4, .align = 4,
+		.get = ia32_genregs_get, .set = ia32_genregs_set
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct ia32_user_i387_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpregs_get, .set = ia32_fpregs_set
+	},
+	{
+		.core_note_type = NT_PRXFPREG,
+		.n = sizeof(struct ia32_user_fxsr_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpxregs_get, .set = ia32_fpxregs_set
+	},
+	{
+		.core_note_type = NT_386_TLS,
+		.n = GDT_ENTRY_TLS_ENTRIES,
+		.bias = GDT_ENTRY_TLS_MIN,
+		.size = sizeof(struct ia32_user_desc),
+		.align = sizeof(struct ia32_user_desc),
+		.active = ia32_tls_active,
+		.get = ia32_tls_get, .set = ia32_tls_set,
+	},
+};
+
+const struct user_regset_view user_ia32_view = {
+	.name = "i386", .e_machine = EM_386,
+	.regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
+};
+
 long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
 			__u32 len_low, __u32 len_high, int advice)
 { 
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 33e5a59..13fd10e 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 78f28d8..c7467f8 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
 static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
 static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
 	}
 
 	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
+	for_each_possible_early_cpu(i)
 		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
 
 	printk(KERN_INFO "Number of logical nodes in system = %d\n",
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 0aebc6f..c64a55a 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -7,8 +7,9 @@
 #define ASM_OFFSETS_C 1
 
 #include <linux/sched.h>
+#include <linux/pid.h>
 #include <linux/clocksource.h>
-
+#include <linux/kbuild.h>
 #include <asm-ia64/processor.h>
 #include <asm-ia64/ptrace.h>
 #include <asm-ia64/siginfo.h>
@@ -18,11 +19,6 @@
 #include "../kernel/sigframe.h"
 #include "../kernel/fsyscall_gtod_data.h"
 
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
 void foo(void)
 {
 	DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
@@ -34,17 +30,29 @@ void foo(void)
 	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
 	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
 
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
 	BLANK();
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index fbe742a..f065093 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
 static atomic_t kdump_cpu_frozen;
 atomic_t kdump_in_progress;
 static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
 
 static inline Elf64_Word
 *append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
 static void
 machine_kdump_on_init(void)
 {
+	crash_save_vmcoreinfo();
 	local_irq_disable();
 	kexec_disable_iosapic();
 	machine_kexec(ia64_kimage);
@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	struct ia64_mca_notify_die *nd;
 	struct die_args *args = data;
 
-	if (!kdump_on_init)
+	if (!kdump_on_init && !kdump_on_fatal_mca)
 		return NOTIFY_DONE;
 
 	if (!ia64_kimage) {
@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 		return NOTIFY_DONE;
 
 	switch (val) {
-		case DIE_INIT_MONARCH_PROCESS:
+	case DIE_INIT_MONARCH_PROCESS:
+		if (kdump_on_init) {
 			atomic_set(&kdump_in_progress, 1);
 			*(nd->monarch_cpu) = -1;
-			break;
-		case DIE_INIT_MONARCH_LEAVE:
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		if (kdump_on_init)
 			machine_kdump_on_init();
-			break;
-		case DIE_INIT_SLAVE_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_RENDZVOUS_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_MONARCH_LEAVE:
-		     /* die_register->signr indicate if MCA is recoverable */
-			if (!args->signr)
-				machine_kdump_on_init();
-			break;
+		break;
+	case DIE_INIT_SLAVE_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_RENDZVOUS_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* *(nd->data) indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !(*(nd->data))) {
+			atomic_set(&kdump_in_progress, 1);
+			*(nd->monarch_cpu) = -1;
+			machine_kdump_on_init();
+		}
+		break;
 	}
 	return NOTIFY_DONE;
 }
 
 #ifdef CONFIG_SYSCTL
-static ctl_table kdump_on_init_table[] = {
+static ctl_table kdump_ctl_table[] = {
 	{
 		.ctl_name = CTL_UNNUMBERED,
 		.procname = "kdump_on_init",
@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
 		.mode = 0644,
 		.proc_handler = &proc_dointvec,
 	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
 	  .ctl_name = CTL_KERN,
 	  .procname = "kernel",
 	  .mode = 0555,
-	  .child = kdump_on_init_table,
+	  .child = kdump_ctl_table,
 	},
 	{ .ctl_name = 0 }
 };
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 728d724..d45f215 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
 	return NULL;
 }
 
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+	struct palo_table *palo = __va(palo_phys);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
 void
 efi_map_pal_code (void)
 {
@@ -432,6 +468,7 @@ efi_init (void)
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
+	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -496,6 +533,8 @@ efi_init (void)
 	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
 	efi.uga        = EFI_INVALID_TABLE_ADDR;
 
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = config_tables[i].table;
@@ -515,10 +554,17 @@ efi_init (void)
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid,
+			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+			palo_phys = config_tables[i].table;
+			printk(" PALO=0x%lx", config_tables[i].table);
 		}
 	}
 	printk("\n");
 
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
+
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
 	efi.set_time = phys_set_time;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c331c4..e49ad8c 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -570,6 +570,7 @@ GLOBAL_ENTRY(ia64_trace_syscall)
 	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
 .ret3:
 (pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
+(pUStk)	rsm psr.i				// disable interrupts
 	br.cond.sptk .work_pending_syscall_end
 
 strace_error:
@@ -710,6 +711,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
 .work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +729,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +749,21 @@ ENTRY(ia64_leave_syscall)
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
 	nop 0
@@ -759,7 +780,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -775,12 +800,22 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +948,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -938,15 +981,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
 	ld8.fill r3=[r16]
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
@@ -954,6 +1019,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov r16=ar.bsp		// get existing backing store pointer
 	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 4484197..c1625c7 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r8=IA64_TASK_TGID_OFFSET,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]				// r8 = current->tgid
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	;;
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_TASK_PID_OFFSET,r16
+	add r8=IA64_PID_LEVEL_OFFSET,r17
 	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
 	;;
-	ld4 r8=[r8]
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
 	cmp.ne p8,p0=0,r9
 	mov r17=-1
 	;;
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
 	// Note that instructions are optimized for McKinley. McKinley can
 	// process two bundles simultaneously and therefore we continuously
 	// try to feed the CPU two bundles and then a stop.
-	//
-	// Additional note that code has changed a lot. Optimization is TBD.
-	// Comments begin with "?" are maybe outdated.
-	tnat.nz p6,p0 = r31	// ? branch deferred to fit later bundle
-	mov pr = r30,0xc000	// Set predicates according to function
+
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
 	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 	;;
+	ld4 r2 = [r2]			// process work pending flags
 	movl r29 = itc_jitter_data	// itc_jitter
 	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
-	ld4 r2 = [r2]		// process work pending flags
-	;;
-(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// ? deferred branch
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	;;
-	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)	br.cond.spnt.many fsys_fallback_syscall
 	;;
 	// Begin critical section
 .time_redo:
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
 (p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
-	;;		// ? could be removed by moving the last add upward
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
 	;;
 	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
 EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
 	;;
-	// ? simulate tbit.nz.or p7,p0 = r28,0
 	getf.sig r2 = f8
 	mf
 	;;
 	ld4 r10 = [r20]		// gtod_lock.sequence
 	shr.u r2 = r2,r23	// shift by factor
-	;;		// ? overloaded 3 bundles!
+	;;
 	add r8 = r8,r2		// Add xtime.nsecs
 	cmp4.ne p7,p0 = r28,r10
 (p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
 EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
 (p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
-	mov r8 = r0
 (p14)	getf.sig r2 = f8
 	;;
+	mov r8 = r0
 (p14)	shr.u r21 = r2, 4
 	;;
 EX(.fail_efault, st8 [r31] = r9)
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index d3a41d5..ddeab4e 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
 	br.ret.sptk.many rp
 END(sched_clock)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 GLOBAL_ENTRY(start_kernel_thread)
 	.prologue
 	.save rp, r0				// this is the end of the call-chain
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 8e7193d..6da1f20 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
 EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
 EXPORT_SYMBOL(csum_ipv6_magic);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index d8be23f..5538471 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			static unsigned char count;
 			static long last_time;
 
-			if (jiffies - last_time > 5*HZ)
+			if (time_after(jiffies, last_time + 5 * HZ))
 				count = 0;
 			if (++count < 5) {
 				last_time = jiffies;
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8..6678c49 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -805,8 +805,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
 	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(16)
 	FAULT(16)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 8d9a446..233434f 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
   { u, u, u },  			/* 1F */
 };
 
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
 /*
  * In this function we check to see if the instruction
  * is IP relative instruction and update the kprobe
@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
 }
 
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+	unsigned int major_opcode;
+	unsigned int template = bundle->quad0.template;
+	unsigned long kprobe_inst;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+			       unsigned long bundle_addr)
+{
+	unsigned int template = bundle->quad0.template;
+
+	do {
+		if (search_exception_tables(bundle_addr + slot) ||
+		    __is_ia64_break_inst(bundle, slot))
+			return 0;	/* exception may occur in this bundle*/
+	} while ((++slot) < 3);
+	template &= 0x1e;
+	if (template >= 0x10 /* including B unit */ ||
+	    template == 0x04 /* including X unit */ ||
+	    template == 0x06) /* undefined */
+		return 0;
+
+	return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned int slot = (unsigned long)p->addr & 0xf;
+	struct kprobe *other_kp;
+
+	if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+		set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
+
+	/* disables boosters in previous slots */
+	for (; addr < (unsigned long)p->addr; addr++) {
+		other_kp = get_kprobe((void *)addr);
+		if (other_kp)
+			other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+	}
+}
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
 
+	prepare_booster(p);
+
 	return 0;
 }
 
@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
 	src = &p->opcode.bundle;
 
 	flush_icache_range((unsigned long)p->ainsn.insn,
-			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+			   (unsigned long)p->ainsn.insn +
+			   sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
 	switch (p->ainsn.slot) {
 		case 0:
 			dest->quad0.slot0 = src->quad0.slot0;
@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
+	free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
 	mutex_unlock(&kprobe_mutex);
 }
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle).  We still need to adjust
  * the ip to point back to the original stack address. To set the IP address
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	if (slot == 1 && bundle_encoding[template][1] == L)
 		slot = 2;
 
-	if (p->ainsn.inst_flag) {
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
 
 		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
 			/* Fix relative IP address */
@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 {
 	unsigned int slot = ia64_psr(regs)->ri;
-	unsigned int template, major_opcode;
-	unsigned long kprobe_inst;
 	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
 	bundle_t bundle;
 
 	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
-	template = bundle.quad0.template;
-
-	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
-	if (slot == 1 && bundle_encoding[template][1] == L)
-		slot++;
 
-	/* Get Kprobe probe instruction at given slot*/
-	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
-	/* For break instruction,
-	 * Bits 37:40 Major opcode to be zero
-	 * Bits 27:32 X6 to be zero
-	 * Bits 32:35 X3 to be zero
-	 */
-	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
-		/* Not a break instruction */
-		return 0;
-	}
-
-	/* Is a break instruction */
-	return 1;
+	return __is_ia64_break_inst(&bundle, slot);
 }
 
 static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 
 ss_probe:
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		ia64_psr(regs)->ri = p->ainsn.slot;
+		regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+		/* turn single stepping off */
+		ia64_psr(regs)->ss = 0;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
 	prepare_ss(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6c18221..705176b 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -69,6 +69,7 @@
  * 2007-04-27 Russ Anderson <rja@sgi.com>
  *	      Support multiple cpus going through OS_MCA in the same event.
  */
+#include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -97,6 +98,7 @@
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/tlb.h>
 
 #include "mca_drv.h"
 #include "entry.h"
@@ -107,11 +109,26 @@
 # define IA64_MCA_DEBUG(fmt...)
 #endif
 
+#define NOTIFY_INIT(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "INIT", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
+#define NOTIFY_MCA(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "MCA", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
 /* Used by mca_asm.S */
 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
 DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
 
 unsigned long __per_cpu_mca[NR_CPUS];
 
@@ -293,7 +310,8 @@ static void ia64_mlogbuf_dump_from_init(void)
 	if (mlogbuf_finished)
 		return;
 
-	if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+	if (mlogbuf_timestamp &&
+			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
 		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
 			" and the system seems to be messed up.\n");
 		ia64_mlogbuf_finish(0);
@@ -762,9 +780,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
 
 	/* Mask all interrupts */
 	local_irq_save(flags);
-	if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(),
-		       (long)&nd, 0, 0) == NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
 
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
 	/* Register with the SAL monarch that the slave has
@@ -772,17 +789,13 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
 	 */
 	ia64_sal_mc_rendez();
 
-	if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(),
-		       (long)&nd, 0, 0) == NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
 
 	/* Wait for the monarch cpu to exit. */
 	while (monarch_cpu != -1)
 	       cpu_relax();	/* spin until monarch leaves */
 
-	if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(),
-		       (long)&nd, 0, 0) == NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
 
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 	/* Enable all interrupts */
@@ -1182,6 +1195,49 @@ all_in:
 	return;
 }
 
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = &__per_cpu_idtrs[cpu][iord-1][i];
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
 /*
  * ia64_mca_handler
  *
@@ -1209,7 +1265,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	int recover, cpu = smp_processor_id();
 	struct task_struct *previous_current;
 	struct ia64_mca_notify_die nd =
-		{ .sos = sos, .monarch_cpu = &monarch_cpu };
+		{ .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
 	static atomic_t mca_count;
 	static cpumask_t mca_cpu;
 
@@ -1225,9 +1281,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 
 	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
 
-	if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+	NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
 
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
 	if (sos->monarch) {
@@ -1241,13 +1295,12 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		 * does not work.
 		 */
 		ia64_mca_wakeup_all();
-		if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__func__);
 	} else {
 		while (cpu_isset(cpu, mca_cpu))
 			cpu_relax();	/* spin until monarch wakes us */
-        }
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
 
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -1266,15 +1319,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	} else {
 		/* Dump buffered message to console */
 		ia64_mlogbuf_finish(1);
-#ifdef CONFIG_KEXEC
-		atomic_set(&kdump_in_progress, 1);
-		monarch_cpu = -1;
-#endif
 	}
-	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__func__);
 
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
 
 	if (atomic_dec_return(&mca_count) > 0) {
 		int i;
@@ -1595,7 +1647,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
 
-	(void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
+	NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
 
 	mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
 		sos->proc_state_param, cpu, sos->monarch);
@@ -1632,17 +1684,15 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
 		while (monarch_cpu == -1)
 		       cpu_relax();	/* spin until monarch enters */
-		if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__func__);
-		if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__func__);
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
+		NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
+
 		while (monarch_cpu != -1)
 		       cpu_relax();	/* spin until monarch leaves */
-		if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__func__);
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
+
 		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
 		set_curr_task(cpu, previous_current);
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -1651,9 +1701,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	}
 
 	monarch_cpu = cpu;
-	if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+	NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
 
 	/*
 	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
@@ -1668,12 +1716,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * to default_monarch_init_process() above and just print all the
 	 * tasks.
 	 */
-	if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__func__);
-	if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__func__);
+	NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
+	NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
+
 	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
 	atomic_dec(&monarchs);
 	set_curr_task(cpu, previous_current);
@@ -1905,7 +1950,7 @@ ia64_mca_init(void)
 			printk(KERN_INFO "Increasing MCA rendezvous timeout from "
 				"%ld to %ld milliseconds\n", timeout, isrv.v0);
 			timeout = isrv.v0;
-			(void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0);
+			NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
 			continue;
 		}
 		printk(KERN_ERR "Failed to register rendezvous interrupt "
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 8bc7d25..a06d465 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -219,8 +219,13 @@ ia64_reload_tr:
 	mov r20=IA64_TR_CURRENT_STACK
 	;;
 	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
 	;;
 	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
 
 done_tlb_purge_and_reload:
 
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8ba..7c548ac 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -3,6 +3,18 @@
 
 #include "entry.h"
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -122,11 +134,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f..c93420c 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
 	for(node=0; node < MAX_NUMNODES; node++)
 		cpus_clear(node_to_cpu_mask[node]);
 
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+	for_each_possible_early_cpu(cpu) {
 		node = -1;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 2cb9425..e0dca87 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 
 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
 		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index a2aabfd..7fbb51e 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
 	do_each_thread (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
-			break;
+			goto out;
 		}
 	} while_each_thread (g, t);
-
+out:
 	read_unlock(&tasklist_lock);
 
 	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
@@ -5511,7 +5511,7 @@ stop_monitoring:
 }
 
 static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_do_interrupt_handler(void *arg, struct pt_regs *regs)
 {
 	struct task_struct *task;
 	pfm_context_t *ctx;
@@ -5591,7 +5591,7 @@ pfm_interrupt_handler(int irq, void *arg)
 
 		start_cycles = ia64_get_itc();
 
-		ret = pfm_do_interrupt_handler(irq, arg, regs);
+		ret = pfm_do_interrupt_handler(arg, regs);
 
 		total_cycles = ia64_get_itc();
 
@@ -6695,16 +6695,12 @@ pfm_init(void)
 	/*
 	 * create /proc/perfmon (mostly for debugging purposes)
 	 */
- 	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+	perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
 	if (perfmon_dir == NULL) {
 		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
 		pmu_conf = NULL;
 		return -1;
 	}
-  	/*
- 	 * install customized file operations for /proc/perfmon entry
- 	 */
- 	perfmon_dir->proc_fops = &pfm_proc_fops;
 
 	/*
 	 * create /proc/sys/kernel/perfmon (for debugging purposes)
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 49937a3..58dcfac 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -183,7 +183,7 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall
 #endif
 
 	/* deal with pending signal delivery */
-	if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
+	if (test_thread_flag(TIF_SIGPENDING))
 		ia64_do_signal(scr, in_syscall);
 
 	/* copy user rbs to kernel rbs */
@@ -625,21 +625,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
 	do_dump_task_fpu(current, info, arg);
 }
 
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_copy_regs, regs);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_copy_task_regs(task, &tcore_info, regs);
-	}
-	return 1;
-}
-
 void
 ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 {
@@ -647,21 +632,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 }
 
 int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_dump_fpu, dst);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_dump_task_fpu(task, &tcore_info, dst);
-	}
-	return 1;
-}
-
-int
 dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 {
 	unw_init_running(do_dump_fpu, dst);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ab784ec..2a9943b 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 1999-2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  * Derived from the x86 and Alpha versions.
  */
@@ -17,6 +20,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
 	psr->dfh = 1;
 }
 
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
-	   unsigned long *data, int write_access)
-{
-	struct ia64_fpreg fpval;
-	int ret;
-
-	ret = unw_get_fr(info, regnum, &fpval);
-	if (ret < 0)
-		return ret;
-
-	if (write_access) {
-		fpval.u.bits[hi] = *data;
-		ret = unw_set_fr(info, regnum, fpval);
-	} else
-		*data = fpval.u.bits[hi];
-	return ret;
-}
-
 /*
  * Change the machine-state of CHILD such that it will return via the normal
  * kernel exit-path, rather than the syscall-exit path.
@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
 
 static int
 access_uarea (struct task_struct *child, unsigned long addr,
-	      unsigned long *data, int write_access)
-{
-	unsigned long *ptr, regnum, urbs_end, cfm;
-	struct switch_stack *sw;
-	struct pt_regs *pt;
-#	define pt_reg_addr(pt, reg)	((void *)			    \
-					 ((unsigned long) (pt)		    \
-					  + offsetof(struct pt_regs, reg)))
-
-
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
-
-	if ((addr & 0x7) != 0) {
-		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
-		return -1;
-	}
-
-	if (addr < PT_F127 + 16) {
-		/* accessing fph */
-		if (write_access)
-			ia64_sync_fph(child);
-		else
-			ia64_flush_fph(child);
-		ptr = (unsigned long *)
-			((unsigned long) &child->thread.fph + addr);
-	} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
-		/* scratch registers untouched by kernel (saved in pt_regs) */
-		ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
-	} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
-		/*
-		 * Scratch registers untouched by kernel (saved in
-		 * switch_stack).
-		 */
-		ptr = (unsigned long *) ((long) sw
-					 + (addr - PT_NAT_BITS - 32));
-	} else if (addr < PT_AR_LC + 8) {
-		/* preserved state: */
-		struct unw_frame_info info;
-		char nat = 0;
-		int ret;
-
-		unw_init_from_blocked_task(&info, child);
-		if (unw_unwind_to_user(&info) < 0)
-			return -1;
-
-		switch (addr) {
-		      case PT_NAT_BITS:
-			return access_nat_bits(child, pt, &info,
-					       data, write_access);
-
-		      case PT_R4: case PT_R5: case PT_R6: case PT_R7:
-			if (write_access) {
-				/* read NaT bit first: */
-				unsigned long dummy;
-
-				ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
-						 &dummy, &nat);
-				if (ret < 0)
-					return ret;
-			}
-			return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
-					     &nat, write_access);
-
-		      case PT_B1: case PT_B2: case PT_B3:
-		      case PT_B4: case PT_B5:
-			return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
-					     write_access);
-
-		      case PT_AR_EC:
-			return unw_access_ar(&info, UNW_AR_EC, data,
-					     write_access);
-
-		      case PT_AR_LC:
-			return unw_access_ar(&info, UNW_AR_LC, data,
-					     write_access);
-
-		      default:
-			if (addr >= PT_F2 && addr < PT_F5 + 16)
-				return access_fr(&info, (addr - PT_F2)/16 + 2,
-						 (addr & 8) != 0, data,
-						 write_access);
-			else if (addr >= PT_F16 && addr < PT_F31 + 16)
-				return access_fr(&info,
-						 (addr - PT_F16)/16 + 16,
-						 (addr & 8) != 0,
-						 data, write_access);
-			else {
-				dprintk("ptrace: rejecting access to register "
-					"address 0x%lx\n", addr);
-				return -1;
-			}
-		}
-	} else if (addr < PT_F9+16) {
-		/* scratch state */
-		switch (addr) {
-		      case PT_AR_BSP:
-			/*
-			 * By convention, we use PT_AR_BSP to refer to
-			 * the end of the user-level backing store.
-			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
-			 * to get the real value of ar.bsp at the time
-			 * the kernel was entered.
-			 *
-			 * Furthermore, when changing the contents of
-			 * PT_AR_BSP (or PT_CFM) while the task is
-			 * blocked in a system call, convert the state
-			 * so that the non-system-call exit
-			 * path is used.  This ensures that the proper
-			 * state will be picked up when resuming
-			 * execution.  However, it *also* means that
-			 * once we write PT_AR_BSP/PT_CFM, it won't be
-			 * possible to modify the syscall arguments of
-			 * the pending system call any longer.  This
-			 * shouldn't be an issue because modifying
-			 * PT_AR_BSP/PT_CFM generally implies that
-			 * we're either abandoning the pending system
-			 * call or that we defer it's re-execution
-			 * (e.g., due to GDB doing an inferior
-			 * function call).
-			 */
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (*data != urbs_end) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					/*
-					 * Simulate user-level write
-					 * of ar.bsp:
-					 */
-					pt->loadrs = 0;
-					pt->ar_bspstore = *data;
-				}
-			} else
-				*data = urbs_end;
-			return 0;
-
-		      case PT_CFM:
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (((cfm ^ *data) & PFM_MASK) != 0) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
-						      | (*data & PFM_MASK));
-				}
-			} else
-				*data = cfm;
-			return 0;
-
-		      case PT_CR_IPSR:
-			if (write_access) {
-				unsigned long tmp = *data;
-				/* psr.ri==3 is a reserved value: SDM 2:25 */
-				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
-					tmp &= ~IA64_PSR_RI;
-				pt->cr_ipsr = ((tmp & IPSR_MASK)
-					       | (pt->cr_ipsr & ~IPSR_MASK));
-			} else
-				*data = (pt->cr_ipsr & IPSR_MASK);
-			return 0;
-
-		      case PT_AR_RSC:
-			if (write_access)
-				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
-			else
-				*data = pt->ar_rsc;
-			return 0;
-
-		      case PT_AR_RNAT:
-			ptr = pt_reg_addr(pt, ar_rnat);
-			break;
-		      case PT_R1:
-			ptr = pt_reg_addr(pt, r1);
-			break;
-		      case PT_R2:  case PT_R3:
-			ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
-			break;
-		      case PT_R8:  case PT_R9:  case PT_R10: case PT_R11:
-			ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
-			break;
-		      case PT_R12: case PT_R13:
-			ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
-			break;
-		      case PT_R14:
-			ptr = pt_reg_addr(pt, r14);
-			break;
-		      case PT_R15:
-			ptr = pt_reg_addr(pt, r15);
-			break;
-		      case PT_R16: case PT_R17: case PT_R18: case PT_R19:
-		      case PT_R20: case PT_R21: case PT_R22: case PT_R23:
-		      case PT_R24: case PT_R25: case PT_R26: case PT_R27:
-		      case PT_R28: case PT_R29: case PT_R30: case PT_R31:
-			ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
-			break;
-		      case PT_B0:
-			ptr = pt_reg_addr(pt, b0);
-			break;
-		      case PT_B6:
-			ptr = pt_reg_addr(pt, b6);
-			break;
-		      case PT_B7:
-			ptr = pt_reg_addr(pt, b7);
-			break;
-		      case PT_F6:  case PT_F6+8: case PT_F7: case PT_F7+8:
-		      case PT_F8:  case PT_F8+8: case PT_F9: case PT_F9+8:
-			ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
-			break;
-		      case PT_AR_BSPSTORE:
-			ptr = pt_reg_addr(pt, ar_bspstore);
-			break;
-		      case PT_AR_UNAT:
-			ptr = pt_reg_addr(pt, ar_unat);
-			break;
-		      case PT_AR_PFS:
-			ptr = pt_reg_addr(pt, ar_pfs);
-			break;
-		      case PT_AR_CCV:
-			ptr = pt_reg_addr(pt, ar_ccv);
-			break;
-		      case PT_AR_FPSR:
-			ptr = pt_reg_addr(pt, ar_fpsr);
-			break;
-		      case PT_CR_IIP:
-			ptr = pt_reg_addr(pt, cr_iip);
-			break;
-		      case PT_PR:
-			ptr = pt_reg_addr(pt, pr);
-			break;
-			/* scratch register */
-
-		      default:
-			/* disallow accessing anything else... */
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-	} else if (addr <= PT_AR_SSD) {
-		ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
-	} else {
-		/* access debug registers */
-
-		if (addr >= PT_IBR) {
-			regnum = (addr - PT_IBR) >> 3;
-			ptr = &child->thread.ibr[0];
-		} else {
-			regnum = (addr - PT_DBR) >> 3;
-			ptr = &child->thread.dbr[0];
-		}
-
-		if (regnum >= 8) {
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-#ifdef CONFIG_PERFMON
-		/*
-		 * Check if debug registers are used by perfmon. This
-		 * test must be done once we know that we can do the
-		 * operation, i.e. the arguments are all valid, but
-		 * before we start modifying the state.
-		 *
-		 * Perfmon needs to keep a count of how many processes
-		 * are trying to modify the debug registers for system
-		 * wide monitoring sessions.
-		 *
-		 * We also include read access here, because they may
-		 * cause the PMU-installed debug register state
-		 * (dbr[], ibr[]) to be reset. The two arrays are also
-		 * used by perfmon, but we do not use
-		 * IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
-		 */
-		if (pfm_use_debug_registers(child)) return -1;
-#endif
-
-		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
-			child->thread.flags |= IA64_THREAD_DBG_VALID;
-			memset(child->thread.dbr, 0,
-			       sizeof(child->thread.dbr));
-			memset(child->thread.ibr, 0,
-			       sizeof(child->thread.ibr));
-		}
-
-		ptr += regnum;
-
-		if ((regnum & 1) && write_access) {
-			/* don't let the user set kernel-level breakpoints: */
-			*ptr = *data & ~(7UL << 56);
-			return 0;
-		}
-	}
-	if (write_access)
-		*ptr = *data;
-	else
-		*data = *ptr;
-	return 0;
-}
+	      unsigned long *data, int write_access);
 
 static long
 ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 }
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
+		}
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/*
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+	}
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
+		}
+
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	tsk_set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
+{
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	extern const struct user_regset_view user_ia32_view;
+	if (IS_IA32_PROCESS(task_pt_regs(tsk)))
+		return &user_ia32_view;
+#endif
+	return &user_ia64_view;
+}
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 779c3cc..ecb9eb7 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -44,8 +44,8 @@
 #include <linux/smp.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/semaphore.h>
 
-#include <asm/semaphore.h>
 #include <asm/sal.h>
 #include <asm/uaccess.h>
 
@@ -648,18 +648,16 @@ salinfo_init(void)
 		if (!dir)
 			continue;
 
-		entry = create_proc_entry("event", S_IRUSR, dir);
+		entry = proc_create_data("event", S_IRUSR, dir,
+					 &salinfo_event_fops, data);
 		if (!entry)
 			continue;
-		entry->data = data;
-		entry->proc_fops = &salinfo_event_fops;
 		*sdir++ = entry;
 
-		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+		entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+					 &salinfo_data_fops, data);
 		if (!entry)
 			continue;
-		entry->data = data;
-		entry->proc_fops = &salinfo_data_fops;
 		*sdir++ = entry;
 
 		/* we missed any events before now */
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3..0000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- *  - Only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - When we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleepers" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
-	unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4aa9eae..5015ca1 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/hpsim.h>
 
@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(unsigned long start, unsigned long end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
@@ -493,6 +517,8 @@ setup_arch (char **cmdline_p)
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
 # endif
 #else
 # ifdef CONFIG_SMP
@@ -946,9 +972,10 @@ cpu_init (void)
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 4e446aa..9a9d4c4 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -213,6 +213,19 @@ send_IPI_allbutself (int op)
  * Called with preemption disabled.
  */
 static inline void
+send_IPI_mask(cpumask_t mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu_mask(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
 send_IPI_all (int op)
 {
 	int i;
@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * <mask>	The set of cpus to run on.  Must not include the current cpu.
+ * <func> 	The function to run. This must be fast and non-blocking.
+ * <info>	An arbitrary pointer to pass to the function.
+ * <wait>	If true, wait (atomically) until function
+ *		has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	struct call_data_struct data;
+	cpumask_t allbutself;
+	int cpus;
+
+	spin_lock(&call_lock);
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+	if (!cpus) {
+		spin_unlock(&call_lock);
+		return 0;
+	}
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(IPI_CALL_FUNC);
+	else
+		send_IPI_mask(mask, IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock(&call_lock);
+	return 0;
+
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
  * in the system.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 32ee597..d7ad42b 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -400,9 +400,9 @@ smp_callin (void)
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
 	cpu_set(cpuid, cpu_online_map);
-	unlock_ipi_calllock();
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
+	unlock_ipi_calllock();
 
 	smp_setup_percpu_timer();
 
@@ -873,7 +873,8 @@ identify_siblings(struct cpuinfo_ia64 *c)
 	u16 pltid;
 	pal_logical_to_physical_t info;
 
-	if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
+	status = ia64_pal_logical_to_phys(-1, &info);
+	if (status != PAL_STATUS_SUCCESS) {
 		if (status != PAL_STATUS_UNIMPLEMENTED) {
 			printk(KERN_ERR
 				"ia64_pal_logical_to_phys failed with %ld\n",
@@ -885,8 +886,13 @@ identify_siblings(struct cpuinfo_ia64 *c)
 		info.overview_cpp  = 1;
 		info.overview_tpc  = 1;
 	}
-	if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
-		printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+
+	status = ia64_sal_physical_id_info(&pltid);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED)
+			printk(KERN_ERR
+				"ia64_sal_pltid failed with %ld\n",
+				status);
 		return;
 	}
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 17fda52..48e15a5 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	account_system_time(prev, 0, delta_stime);
+	account_system_time_scaled(prev, delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime);
+		account_user_time_scaled(prev, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	account_system_time(tsk, 0, delta_stime);
+	account_system_time_scaled(tsk, delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime);
+		account_user_time_scaled(p, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 {
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index a2484fc..abb17a6 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -27,6 +27,15 @@
 
 static struct ia64_cpu *sysfs_cpus;
 
+void arch_fix_phys_package_id(int num, u32 slot)
+{
+#ifdef CONFIG_SMP
+	if (cpu_data(num)->socket_id == -1)
+		cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
+
 int arch_register_cpu(int num)
 {
 #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 6903361..ff0e7c1 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -13,6 +13,7 @@
  * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  * 2001/01/17	Add support emulation of unaligned kernel accesses.
  */
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (time_after(jiffies, last_time + 5 * HZ))
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 2a90c32..e77995a 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001-2006 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -177,12 +177,13 @@ failed:
  * uncached_alloc_page
  *
  * @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
  *
- * Allocate 1 uncached page. Allocates on the requested node. If no
- * uncached pages are available on the requested node, roundrobin starting
- * with the next higher node.
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
  */
-unsigned long uncached_alloc_page(int starting_nid)
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
 {
 	unsigned long uc_addr;
 	struct uncached_pool *uc_pool;
@@ -202,7 +203,8 @@ unsigned long uncached_alloc_page(int starting_nid)
 		if (uc_pool->pool == NULL)
 			continue;
 		do {
-			uc_addr = gen_pool_alloc(uc_pool->pool, PAGE_SIZE);
+			uc_addr = gen_pool_alloc(uc_pool->pool,
+						 n_pages * PAGE_SIZE);
 			if (uc_addr != 0)
 				return uc_addr;
 		} while (uncached_add_chunk(uc_pool, nid) == 0);
@@ -217,11 +219,12 @@ EXPORT_SYMBOL(uncached_alloc_page);
 /*
  * uncached_free_page
  *
- * @uc_addr: uncached address of page to free
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
  *
- * Free a single uncached page.
+ * Free the specified number of uncached pages.
  */
-void uncached_free_page(unsigned long uc_addr)
+void uncached_free_page(unsigned long uc_addr, int n_pages)
 {
 	int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
 	struct gen_pool *pool = uncached_pools[nid].pool;
@@ -232,7 +235,7 @@ void uncached_free_page(unsigned long uc_addr)
 	if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
 		panic("uncached_free_page invalid address %lx\n", uc_addr);
 
-	gen_pool_free(pool, uc_addr, PAGE_SIZE);
+	gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
 }
 EXPORT_SYMBOL(uncached_free_page);
 
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 0000000..7914e48
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,49 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+	bool
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	depends on HAVE_KVM || IA64
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	---help---
+	  Support hosting fully virtualized guest machines using hardware
+	  virtualization extensions.  You will need a fairly recent
+	  processor equipped with virtualization extensions. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_INTEL
+	tristate "KVM for Intel Itanium 2 processors support"
+	depends on KVM && m
+	---help---
+	  Provides support for KVM on Itanium 2 processors equipped with the VT
+	  extensions.
+
+config KVM_TRACE
+       bool
+
+endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 0000000..5235339
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,58 @@
+#This Make file is to generate asm-offsets.h and build source.
+#
+
+#Generate asm-offsets.h for vmm module build
+offsets-file := asm-offsets.h
+
+always  := $(offsets-file)
+targets := $(offsets-file)
+targets += arch/ia64/kvm/asm-offsets.s
+clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S)
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN     $@
+define cmd_offsets
+	(set -e; \
+	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
+	 echo "#define __ASM_KVM_OFFSETS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Makefile"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+# We use internal rules to avoid the "is up to date" message from make
+arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
+	$(call cmd,offsets)
+
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+
+kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+obj-$(CONFIG_KVM) += kvm.o
+
+FORCE : $(obj)/$(offsets-file)
+EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+	vtlb.o process.o
+#Add link memcpy and memset to avoid possible structure assignment error
+kvm-intel-objs += ../lib/memset.o ../lib/memcpy.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 0000000..4e3dc13
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,251 @@
+/*
+ * asm-offsets.c Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ *
+ * Anthony Xu    <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/autoconf.h>
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+#define task_struct kvm_vcpu
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : :)
+
+#define OFFSET(_sym, _str, _mem) \
+    DEFINE(_sym, offsetof(_str, _mem));
+
+void foo(void)
+{
+	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
+	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
+
+	BLANK();
+
+	DEFINE(VMM_VCPU_META_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu,
+				arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_VRR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vrr[0]));
+	DEFINE(VMM_VPD_IRR0_OFFSET,
+			offsetof(struct vpd, irr[0]));
+	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_check));
+	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VPD_VHPI_OFFSET,
+			offsetof(struct vpd, vhpi));
+	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vsa_base));
+	DEFINE(VMM_VCPU_VPD_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VCPU_IRQ_CHECK,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VCPU_TIMER_PENDING,
+			offsetof(struct kvm_vcpu, arch.timer_pending));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_offset));
+	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
+			offsetof(struct kvm_vcpu, arch.last_itc));
+	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
+			offsetof(struct kvm_vcpu, arch.saved_gp));
+
+	BLANK();
+
+	DEFINE(VMM_PT_REGS_B6_OFFSET,
+				offsetof(struct kvm_pt_regs, b6));
+	DEFINE(VMM_PT_REGS_B7_OFFSET,
+				offsetof(struct kvm_pt_regs, b7));
+	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_csd));
+	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ssd));
+	DEFINE(VMM_PT_REGS_R8_OFFSET,
+				offsetof(struct kvm_pt_regs, r8));
+	DEFINE(VMM_PT_REGS_R9_OFFSET,
+				offsetof(struct kvm_pt_regs, r9));
+	DEFINE(VMM_PT_REGS_R10_OFFSET,
+				offsetof(struct kvm_pt_regs, r10));
+	DEFINE(VMM_PT_REGS_R11_OFFSET,
+				offsetof(struct kvm_pt_regs, r11));
+	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ipsr));
+	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_iip));
+	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ifs));
+	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_unat));
+	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_pfs));
+	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rsc));
+	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rnat));
+
+	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_bspstore));
+	DEFINE(VMM_PT_REGS_PR_OFFSET,
+				offsetof(struct kvm_pt_regs, pr));
+	DEFINE(VMM_PT_REGS_B0_OFFSET,
+				offsetof(struct kvm_pt_regs, b0));
+	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
+				offsetof(struct kvm_pt_regs, loadrs));
+	DEFINE(VMM_PT_REGS_R1_OFFSET,
+				offsetof(struct kvm_pt_regs, r1));
+	DEFINE(VMM_PT_REGS_R12_OFFSET,
+				offsetof(struct kvm_pt_regs, r12));
+	DEFINE(VMM_PT_REGS_R13_OFFSET,
+				offsetof(struct kvm_pt_regs, r13));
+	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_fpsr));
+	DEFINE(VMM_PT_REGS_R15_OFFSET,
+				offsetof(struct kvm_pt_regs, r15));
+	DEFINE(VMM_PT_REGS_R14_OFFSET,
+				offsetof(struct kvm_pt_regs, r14));
+	DEFINE(VMM_PT_REGS_R2_OFFSET,
+				offsetof(struct kvm_pt_regs, r2));
+	DEFINE(VMM_PT_REGS_R3_OFFSET,
+				offsetof(struct kvm_pt_regs, r3));
+	DEFINE(VMM_PT_REGS_R16_OFFSET,
+				offsetof(struct kvm_pt_regs, r16));
+	DEFINE(VMM_PT_REGS_R17_OFFSET,
+				offsetof(struct kvm_pt_regs, r17));
+	DEFINE(VMM_PT_REGS_R18_OFFSET,
+				offsetof(struct kvm_pt_regs, r18));
+	DEFINE(VMM_PT_REGS_R19_OFFSET,
+				offsetof(struct kvm_pt_regs, r19));
+	DEFINE(VMM_PT_REGS_R20_OFFSET,
+				offsetof(struct kvm_pt_regs, r20));
+	DEFINE(VMM_PT_REGS_R21_OFFSET,
+				offsetof(struct kvm_pt_regs, r21));
+	DEFINE(VMM_PT_REGS_R22_OFFSET,
+				offsetof(struct kvm_pt_regs, r22));
+	DEFINE(VMM_PT_REGS_R23_OFFSET,
+				offsetof(struct kvm_pt_regs, r23));
+	DEFINE(VMM_PT_REGS_R24_OFFSET,
+				offsetof(struct kvm_pt_regs, r24));
+	DEFINE(VMM_PT_REGS_R25_OFFSET,
+				offsetof(struct kvm_pt_regs, r25));
+	DEFINE(VMM_PT_REGS_R26_OFFSET,
+				offsetof(struct kvm_pt_regs, r26));
+	DEFINE(VMM_PT_REGS_R27_OFFSET,
+				offsetof(struct kvm_pt_regs, r27));
+	DEFINE(VMM_PT_REGS_R28_OFFSET,
+				offsetof(struct kvm_pt_regs, r28));
+	DEFINE(VMM_PT_REGS_R29_OFFSET,
+				offsetof(struct kvm_pt_regs, r29));
+	DEFINE(VMM_PT_REGS_R30_OFFSET,
+				offsetof(struct kvm_pt_regs, r30));
+	DEFINE(VMM_PT_REGS_R31_OFFSET,
+				offsetof(struct kvm_pt_regs, r31));
+	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ccv));
+	DEFINE(VMM_PT_REGS_F6_OFFSET,
+				offsetof(struct kvm_pt_regs, f6));
+	DEFINE(VMM_PT_REGS_F7_OFFSET,
+				offsetof(struct kvm_pt_regs, f7));
+	DEFINE(VMM_PT_REGS_F8_OFFSET,
+				offsetof(struct kvm_pt_regs, f8));
+	DEFINE(VMM_PT_REGS_F9_OFFSET,
+				offsetof(struct kvm_pt_regs, f9));
+	DEFINE(VMM_PT_REGS_F10_OFFSET,
+				offsetof(struct kvm_pt_regs, f10));
+	DEFINE(VMM_PT_REGS_F11_OFFSET,
+				offsetof(struct kvm_pt_regs, f11));
+	DEFINE(VMM_PT_REGS_R4_OFFSET,
+				offsetof(struct kvm_pt_regs, r4));
+	DEFINE(VMM_PT_REGS_R5_OFFSET,
+				offsetof(struct kvm_pt_regs, r5));
+	DEFINE(VMM_PT_REGS_R6_OFFSET,
+				offsetof(struct kvm_pt_regs, r6));
+	DEFINE(VMM_PT_REGS_R7_OFFSET,
+				offsetof(struct kvm_pt_regs, r7));
+	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, eml_unat));
+	DEFINE(VMM_VCPU_IIPA_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_iipa));
+	DEFINE(VMM_VCPU_OPCODE_OFFSET,
+				offsetof(struct kvm_vcpu, arch.opcode));
+	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
+	DEFINE(VMM_VCPU_ISR_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_isr));
+	DEFINE(VMM_PT_REGS_R16_SLOT,
+				(((offsetof(struct kvm_pt_regs, r16)
+				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+				offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
+	BLANK();
+
+	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
+	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.insvc[0]));
+	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
+	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
+
+	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
+	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
+	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
+	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
+	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
+	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
+	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
+	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
+	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
+	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
+	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
+	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
+	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
+	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
+	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
+	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
+	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
+	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
+	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
+	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
+	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
+	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
+	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
+	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
+	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
+	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
+	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
+	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
+	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
+	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
+	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
+	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
+	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
+	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
+	BLANK();
+}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 0000000..318b811
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1805 @@
+/*
+ * kvm_ia64.c: Basic KVM suppport On Itanium series processors
+ *
+ *
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pal.h>
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/tlb.h>
+
+#include "misc.h"
+#include "vti.h"
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+
+static unsigned long kvm_vmm_base;
+static unsigned long kvm_vsa_base;
+static unsigned long kvm_vm_buffer;
+static unsigned long kvm_vm_buffer_size;
+unsigned long kvm_vmm_gp;
+
+static long vp_env_info;
+
+static struct kvm_vmm_info *kvm_vmm_info;
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+
+struct fdesc{
+    unsigned long ip;
+    unsigned long gp;
+};
+
+static void kvm_flush_icache(unsigned long start, unsigned long len)
+{
+	int l;
+
+	for (l = 0; l < (len + 32); l += 32)
+		ia64_fc(start + l);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void kvm_flush_tlb_all(void)
+{
+	unsigned long i, j, count0, count1, stride0, stride1, addr;
+	long flags;
+
+	addr    = local_cpu_data->ptce_base;
+	count0  = local_cpu_data->ptce_count[0];
+	count1  = local_cpu_data->ptce_count[1];
+	stride0 = local_cpu_data->ptce_stride[0];
+	stride1 = local_cpu_data->ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+			(u64)opt_handler);
+
+	return iprv.status;
+}
+
+static  DEFINE_SPINLOCK(vp_lock);
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+	long  status;
+	long  tmp_base;
+	unsigned long pte;
+	unsigned long saved_psr;
+	int slot;
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (slot < 0)
+		return;
+	local_irq_restore(saved_psr);
+
+	spin_lock(&vp_lock);
+	status = ia64_pal_vp_init_env(kvm_vsa_base ?
+				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
+	if (status != 0) {
+		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
+		return ;
+	}
+
+	if (!kvm_vsa_base) {
+		kvm_vsa_base = tmp_base;
+		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
+	}
+	spin_unlock(&vp_lock);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+
+	long status;
+	int slot;
+	unsigned long pte;
+	unsigned long saved_psr;
+	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (slot < 0)
+		return;
+	local_irq_restore(saved_psr);
+
+	status = ia64_pal_vp_exit_env(host_iva);
+	if (status)
+		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
+				status);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_USER_MEMORY:
+
+		r = 1;
+		break;
+	default:
+		r = 0;
+	}
+	return r;
+
+}
+
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
+					gpa_t addr)
+{
+	struct kvm_io_device *dev;
+
+	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+
+	return dev;
+}
+
+static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	kvm_run->hw.hardware_exit_reason = 1;
+	return 0;
+}
+
+static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct kvm_mmio_req *p;
+	struct kvm_io_device *mmio_dev;
+
+	p = kvm_get_vcpu_ioreq(vcpu);
+
+	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
+		goto mmio;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
+	vcpu->mmio_size = kvm_run->mmio.len = p->size;
+	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
+
+	if (vcpu->mmio_is_write)
+		memcpy(vcpu->mmio_data, &p->data, p->size);
+	memcpy(kvm_run->mmio.data, &p->data, p->size);
+	kvm_run->exit_reason = KVM_EXIT_MMIO;
+	return 0;
+mmio:
+	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
+	if (mmio_dev) {
+		if (!p->dir)
+			kvm_iodevice_write(mmio_dev, p->addr, p->size,
+						&p->data);
+		else
+			kvm_iodevice_read(mmio_dev, p->addr, p->size,
+						&p->data);
+
+	} else
+		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
+	p->state = STATE_IORESP_READY;
+
+	return 1;
+}
+
+static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		return kvm_pal_emul(vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 2;
+		return 0;
+	}
+}
+
+static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		kvm_sal_emul(vcpu);
+		return 1;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 3;
+		return 0;
+	}
+
+}
+
+/*
+ *  offset: address offset to IPI space.
+ *  value:  deliver value.
+ */
+static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
+				uint64_t vector)
+{
+	switch (dm) {
+	case SAPIC_FIXED:
+		kvm_apic_set_irq(vcpu, vector, 0);
+		break;
+	case SAPIC_NMI:
+		kvm_apic_set_irq(vcpu, 2, 0);
+		break;
+	case SAPIC_EXTINT:
+		kvm_apic_set_irq(vcpu, 0, 0);
+		break;
+	case SAPIC_INIT:
+	case SAPIC_PMI:
+	default:
+		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
+		break;
+	}
+}
+
+static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
+			unsigned long eid)
+{
+	union ia64_lid lid;
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		if (kvm->vcpus[i]) {
+			lid.val = VCPU_LID(kvm->vcpus[i]);
+			if (lid.id == id && lid.eid == eid)
+				return kvm->vcpus[i];
+		}
+	}
+
+	return NULL;
+}
+
+static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm_vcpu *target_vcpu;
+	struct kvm_pt_regs *regs;
+	union ia64_ipi_a addr = p->u.ipi_data.addr;
+	union ia64_ipi_d data = p->u.ipi_data.data;
+
+	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
+	if (!target_vcpu)
+		return handle_vm_error(vcpu, kvm_run);
+
+	if (!target_vcpu->arch.launched) {
+		regs = vcpu_regs(target_vcpu);
+
+		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
+		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
+
+		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		if (waitqueue_active(&target_vcpu->wq))
+			wake_up_interruptible(&target_vcpu->wq);
+	} else {
+		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
+		if (target_vcpu != vcpu)
+			kvm_vcpu_kick(target_vcpu);
+	}
+
+	return 1;
+}
+
+struct call_data {
+	struct kvm_ptc_g ptc_g_data;
+	struct kvm_vcpu *vcpu;
+};
+
+static void vcpu_global_purge(void *info)
+{
+	struct call_data *p = (struct call_data *)info;
+	struct kvm_vcpu *vcpu = p->vcpu;
+
+	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		return;
+
+	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
+	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
+		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
+							p->ptc_g_data;
+	} else {
+		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
+		vcpu->arch.ptc_g_count = 0;
+		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+	}
+}
+
+static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm *kvm = vcpu->kvm;
+	struct call_data call_data;
+	int i;
+	call_data.ptc_g_data = p->u.ptc_g_data;
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
+						KVM_MP_STATE_UNINITIALIZED ||
+					vcpu == kvm->vcpus[i])
+			continue;
+
+		if (waitqueue_active(&kvm->vcpus[i]->wq))
+			wake_up_interruptible(&kvm->vcpus[i]->wq);
+
+		if (kvm->vcpus[i]->cpu != -1) {
+			call_data.vcpu = kvm->vcpus[i];
+			smp_call_function_single(kvm->vcpus[i]->cpu,
+					vcpu_global_purge, &call_data, 0, 1);
+		} else
+			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
+
+	}
+	return 1;
+}
+
+static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+
+	ktime_t kt;
+	long itc_diff;
+	unsigned long vcpu_now_itc;
+
+	unsigned long expires;
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+
+	if (time_after(vcpu_now_itc, vpd->itm)) {
+		vcpu->arch.timer_check = 1;
+		return 1;
+	}
+	itc_diff = vpd->itm - vcpu_now_itc;
+	if (itc_diff < 0)
+		itc_diff = -itc_diff;
+
+	expires = div64_u64(itc_diff, cyc_per_usec);
+	kt = ktime_set(0, 1000 * expires);
+	vcpu->arch.ht_active = 1;
+	hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+
+	if (irqchip_in_kernel(vcpu->kvm)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		kvm_vcpu_block(vcpu);
+		hrtimer_cancel(p_ht);
+		vcpu->arch.ht_active = 0;
+
+		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+			return -EINTR;
+		return 1;
+	} else {
+		printk(KERN_ERR"kvm: Unsupported userspace halt!");
+		return 0;
+	}
+}
+
+static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run) = {
+	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
+	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
+	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
+	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
+	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
+	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
+	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
+	[EXIT_REASON_IPI]		    = handle_ipi,
+	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+
+};
+
+static const int kvm_vti_max_exit_handlers =
+		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
+
+static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
+{
+}
+
+static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_exit_data;
+
+	p_exit_data = kvm_get_exit_data(vcpu);
+	return p_exit_data->exit_reason;
+}
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	u32 exit_reason = kvm_get_exit_reason(vcpu);
+	vcpu->arch.last_exit = exit_reason;
+
+	if (exit_reason < kvm_vti_max_exit_handlers
+			&& kvm_vti_exit_handlers[exit_reason])
+		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = exit_reason;
+	}
+	return 0;
+}
+
+static inline void vti_set_rr6(unsigned long rr6)
+{
+	ia64_set_rr(RR6, rr6);
+	ia64_srlz_i();
+}
+
+static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte;
+	struct kvm *kvm = vcpu->kvm;
+	int r;
+
+	/*Insert a pair of tr to map vmm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vmm_tr_slot = r;
+	/*Insert a pairt of tr to map data of vm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
+					pte, KVM_VM_DATA_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vm_tr_slot = r;
+	r = 0;
+out:
+	return r;
+
+}
+
+static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+
+	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
+	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
+
+}
+
+static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
+{
+	int cpu = smp_processor_id();
+
+	if (vcpu->arch.last_run_cpu != cpu ||
+			per_cpu(last_vcpu, cpu) != vcpu) {
+		per_cpu(last_vcpu, cpu) = vcpu;
+		vcpu->arch.last_run_cpu = cpu;
+		kvm_flush_tlb_all();
+	}
+
+	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
+	vti_set_rr6(vcpu->arch.vmm_rr);
+	return kvm_insert_vmm_mapping(vcpu);
+}
+static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
+{
+	kvm_purge_vmm_mapping(vcpu);
+	vti_set_rr6(vcpu->arch.host_rr6);
+}
+
+static int  vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	union context *host_ctx, *guest_ctx;
+	int r;
+
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto out;
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+	kvm_vcpu_post_transition(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+
+again:
+	preempt_disable();
+
+	kvm_prepare_guest_switch(vcpu);
+	local_irq_disable();
+
+	if (signal_pending(current)) {
+		local_irq_enable();
+		preempt_enable();
+		r = -EINTR;
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		goto out;
+	}
+
+	vcpu->guest_mode = 1;
+	kvm_guest_enter();
+
+	r = vti_vcpu_run(vcpu, kvm_run);
+	if (r < 0) {
+		local_irq_enable();
+		preempt_enable();
+		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		goto out;
+	}
+
+	vcpu->arch.launched = 1;
+	vcpu->guest_mode = 0;
+	local_irq_enable();
+
+	/*
+	 * We must have an instruction between local_irq_enable() and
+	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
+	 * the interrupt shadow.  The stat.exits increment will do nicely.
+	 * But we need to prevent reordering, hence this barrier():
+	 */
+	barrier();
+
+	kvm_guest_exit();
+
+	preempt_enable();
+
+	r = kvm_handle_exit(kvm_run, vcpu);
+
+	if (r > 0) {
+		if (!need_resched())
+			goto again;
+	}
+
+out:
+	if (r > 0) {
+		kvm_resched(vcpu);
+		goto again;
+	}
+
+	return r;
+}
+
+static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
+
+	if (!vcpu->mmio_is_write)
+		memcpy(&p->data, vcpu->mmio_data, 8);
+	p->state = STATE_IORESP_READY;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+	sigset_t sigsaved;
+
+	vcpu_load(vcpu);
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+		kvm_vcpu_block(vcpu);
+		vcpu_put(vcpu);
+		return -EAGAIN;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (vcpu->mmio_needed) {
+		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		kvm_set_mmio_data(vcpu);
+		vcpu->mmio_read_completed = 1;
+		vcpu->mmio_needed = 0;
+	}
+	r = __vcpu_run(vcpu, kvm_run);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu_put(vcpu);
+	return r;
+}
+
+/*
+ * Allocate 16M memory for every vm to hold its specific data.
+ * Its memory map is defined in kvm_host.h.
+ */
+static struct kvm *kvm_alloc_kvm(void)
+{
+
+	struct kvm *kvm;
+	uint64_t  vm_base;
+
+	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
+
+	if (!vm_base)
+		return ERR_PTR(-ENOMEM);
+	printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
+
+	/* Zero all pages before use! */
+	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+
+	kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+	kvm->arch.vm_base = vm_base;
+
+	return kvm;
+}
+
+struct kvm_io_range {
+	unsigned long start;
+	unsigned long size;
+	unsigned long type;
+};
+
+static const struct kvm_io_range io_ranges[] = {
+	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
+	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
+	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
+	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
+	{PIB_START, PIB_SIZE, GPFN_PIB},
+};
+
+static void kvm_build_io_pmt(struct kvm *kvm)
+{
+	unsigned long i, j;
+
+	/* Mark I/O ranges */
+	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
+							i++) {
+		for (j = io_ranges[i].start;
+				j < io_ranges[i].start + io_ranges[i].size;
+				j += PAGE_SIZE)
+			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
+					io_ranges[i].type, 0);
+	}
+
+}
+
+/*Use unused rids to virtualize guest rid.*/
+#define GUEST_PHYSICAL_RR0	0x1739
+#define GUEST_PHYSICAL_RR4	0x2739
+#define VMM_INIT_RR		0x1660
+
+static void kvm_init_vm(struct kvm *kvm)
+{
+	long vm_base;
+
+	BUG_ON(!kvm);
+
+	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
+	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
+	kvm->arch.vmm_init_rr = VMM_INIT_RR;
+
+	vm_base = kvm->arch.vm_base;
+	if (vm_base) {
+		kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
+		kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
+		kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
+	}
+
+	/*
+	 *Fill P2M entries for MMIO/IO ranges
+	 */
+	kvm_build_io_pmt(kvm);
+
+}
+
+struct  kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm = kvm_alloc_kvm();
+
+	if (IS_ERR(kvm))
+		return ERR_PTR(-ENOMEM);
+	kvm_init_vm(kvm);
+
+	return kvm;
+
+}
+
+static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
+					struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
+				sizeof(struct kvm_ioapic_state));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		memcpy(ioapic_irqchip(kvm),
+				&chip->chip.ioapic,
+				sizeof(struct kvm_ioapic_state));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int r;
+
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		vpd->vgr[i] = regs->vpd.vgr[i];
+		vpd->vbgr[i] = regs->vpd.vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		vpd->vcr[i] = regs->vpd.vcr[i];
+	vpd->vhpi = regs->vpd.vhpi;
+	vpd->vnat = regs->vpd.vnat;
+	vpd->vbnat = regs->vpd.vbnat;
+	vpd->vpsr = regs->vpd.vpsr;
+
+	vpd->vpr = regs->vpd.vpr;
+
+	r = -EFAULT;
+	r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
+						sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_from_user(vcpu + 1, regs->saved_stack +
+			sizeof(struct kvm_vcpu),
+			IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+	if (r)
+		goto out;
+	vcpu->arch.exit_data =
+		((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+
+	RESTORE_REGS(mp_state);
+	RESTORE_REGS(vmm_rr);
+	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
+	RESTORE_REGS(itr_regions);
+	RESTORE_REGS(dtr_regions);
+	RESTORE_REGS(tc_regions);
+	RESTORE_REGS(irq_check);
+	RESTORE_REGS(itc_check);
+	RESTORE_REGS(timer_check);
+	RESTORE_REGS(timer_pending);
+	RESTORE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		vcpu->arch.vrr[i] = regs->vrr[i];
+		vcpu->arch.ibr[i] = regs->ibr[i];
+		vcpu->arch.dbr[i] = regs->dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		vcpu->arch.insvc[i] = regs->insvc[i];
+	RESTORE_REGS(xtp);
+	RESTORE_REGS(metaphysical_rr0);
+	RESTORE_REGS(metaphysical_rr4);
+	RESTORE_REGS(metaphysical_saved_rr0);
+	RESTORE_REGS(metaphysical_saved_rr4);
+	RESTORE_REGS(fp_psr);
+	RESTORE_REGS(saved_gp);
+
+	vcpu->arch.irq_new_pending = 1;
+	vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+	set_bit(KVM_REQ_RESUME, &vcpu->requests);
+
+	vcpu_put(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r = -EINVAL;
+
+	switch (ioctl) {
+	case KVM_SET_MEMORY_REGION: {
+		struct kvm_memory_region kvm_mem;
+		struct kvm_userspace_memory_region kvm_userspace_mem;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
+			goto out;
+		kvm_userspace_mem.slot = kvm_mem.slot;
+		kvm_userspace_mem.flags = kvm_mem.flags;
+		kvm_userspace_mem.guest_phys_addr =
+					kvm_mem.guest_phys_addr;
+		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+		r = kvm_vm_ioctl_set_memory_region(kvm,
+					&kvm_userspace_mem, 0);
+		if (r)
+			goto out;
+		break;
+		}
+	case KVM_CREATE_IRQCHIP:
+		r = -EFAULT;
+		r = kvm_ioapic_init(kvm);
+		if (r)
+			goto out;
+		break;
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level irq_event;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+		if (irqchip_in_kernel(kvm)) {
+			mutex_lock(&kvm->lock);
+			kvm_ioapic_set_irq(kvm->arch.vioapic,
+						irq_event.irq,
+						irq_event.level);
+			mutex_unlock(&kvm->lock);
+			r = 0;
+		}
+		break;
+		}
+	case KVM_GET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &chip, sizeof chip))
+				goto out;
+		r = 0;
+		break;
+		}
+	case KVM_SET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+		}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+
+}
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+		struct kvm_translation *tr)
+{
+
+	return -EINVAL;
+}
+
+static int kvm_alloc_vmm_area(void)
+{
+	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
+		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
+				get_order(KVM_VMM_SIZE));
+		if (!kvm_vmm_base)
+			return -ENOMEM;
+
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
+
+		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
+				kvm_vmm_base, kvm_vm_buffer);
+	}
+
+	return 0;
+}
+
+static void kvm_free_vmm_area(void)
+{
+	if (kvm_vmm_base) {
+		/*Zero this area before free to avoid bits leak!!*/
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
+		kvm_vmm_base  = 0;
+		kvm_vm_buffer = 0;
+		kvm_vsa_base = 0;
+	}
+}
+
+/*
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
+ * cached on it. Leave it as blank for IA64.
+ */
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+static int vti_init_vpd(struct kvm_vcpu *vcpu)
+{
+	int i;
+	union cpuid3_t cpuid3;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (IS_ERR(vpd))
+		return PTR_ERR(vpd);
+
+	/* CPUID init */
+	for (i = 0; i < 5; i++)
+		vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+	/* Limit the CPUID number to 5 */
+	cpuid3.value = vpd->vcpuid[3];
+	cpuid3.number = 4;	/* 5 - 1 */
+	vpd->vcpuid[3] = cpuid3.value;
+
+	/*Set vac and vdc fields*/
+	vpd->vac.a_from_int_cr = 1;
+	vpd->vac.a_to_int_cr = 1;
+	vpd->vac.a_from_psr = 1;
+	vpd->vac.a_from_cpuid = 1;
+	vpd->vac.a_cover = 1;
+	vpd->vac.a_bsw = 1;
+	vpd->vac.a_int = 1;
+	vpd->vdc.d_vmsw = 1;
+
+	/*Set virtual buffer*/
+	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
+
+	return 0;
+}
+
+static int vti_create_vp(struct kvm_vcpu *vcpu)
+{
+	long ret;
+	struct vpd *vpd = vcpu->arch.vpd;
+	unsigned long  vmm_ivt;
+
+	vmm_ivt = kvm_vmm_info->vmm_ivt;
+
+	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
+
+	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
+
+	if (ret) {
+		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void init_ptce_info(struct kvm_vcpu *vcpu)
+{
+	ia64_ptce_info_t ptce = {0};
+
+	ia64_get_ptce(&ptce);
+	vcpu->arch.ptce_base = ptce.base;
+	vcpu->arch.ptce_count[0] = ptce.count[0];
+	vcpu->arch.ptce_count[1] = ptce.count[1];
+	vcpu->arch.ptce_stride[0] = ptce.stride[0];
+	vcpu->arch.ptce_stride[1] = ptce.stride[1];
+}
+
+static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
+{
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+
+	if (hrtimer_cancel(p_ht))
+		hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
+{
+	struct kvm_vcpu *vcpu;
+	wait_queue_head_t *q;
+
+	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+		goto out;
+
+	q = &vcpu->wq;
+	if (waitqueue_active(q)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		wake_up_interruptible(q);
+	}
+out:
+	vcpu->arch.timer_check = 1;
+	return HRTIMER_NORESTART;
+}
+
+#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *v;
+	int r;
+	int i;
+	long itc_offset;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	union context *p_ctx = &vcpu->arch.guest;
+	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
+
+	/*Init vcpu context for first run.*/
+	if (IS_ERR(vmm_vcpu))
+		return PTR_ERR(vmm_vcpu);
+
+	if (vcpu->vcpu_id == 0) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		/*Set entry address for first run.*/
+		regs->cr_iip = PALE_RESET_ENTRY;
+
+		/*Initilize itc offset for vcpus*/
+		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+			v->arch.itc_offset = itc_offset;
+			v->arch.last_itc = 0;
+		}
+	} else
+		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+	r = -ENOMEM;
+	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
+	if (!vcpu->arch.apic)
+		goto out;
+	vcpu->arch.apic->vcpu = vcpu;
+
+	p_ctx->gr[1] = 0;
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
+	p_ctx->psr = 0x1008522000UL;
+	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
+	p_ctx->caller_unat = 0;
+	p_ctx->pr = 0x0;
+	p_ctx->ar[36] = 0x0; /*unat*/
+	p_ctx->ar[19] = 0x0; /*rnat*/
+	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
+				((sizeof(struct kvm_vcpu)+15) & ~15);
+	p_ctx->ar[64] = 0x0; /*pfs*/
+	p_ctx->cr[0] = 0x7e04UL;
+	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
+	p_ctx->cr[8] = 0x3c;
+
+	/*Initilize region register*/
+	p_ctx->rr[0] = 0x30;
+	p_ctx->rr[1] = 0x30;
+	p_ctx->rr[2] = 0x30;
+	p_ctx->rr[3] = 0x30;
+	p_ctx->rr[4] = 0x30;
+	p_ctx->rr[5] = 0x30;
+	p_ctx->rr[7] = 0x30;
+
+	/*Initilize branch register 0*/
+	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
+
+	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
+	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
+	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
+
+	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.hlt_timer.function = hlt_timer_fn;
+
+	vcpu->arch.last_run_cpu = -1;
+	vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vsa_base = kvm_vsa_base;
+	vcpu->arch.__gp = kvm_vmm_gp;
+	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+	init_ptce_info(vcpu);
+
+	r = 0;
+out:
+	return r;
+}
+
+static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
+{
+	unsigned long psr;
+	int r;
+
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	if (r)
+		goto fail;
+	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
+	if (r)
+		goto fail;
+
+	r = vti_init_vpd(vcpu);
+	if (r) {
+		printk(KERN_DEBUG"kvm: vpd init error!!\n");
+		goto uninit;
+	}
+
+	r = vti_create_vp(vcpu);
+	if (r)
+		goto uninit;
+
+	kvm_purge_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+
+	return 0;
+uninit:
+	kvm_vcpu_uninit(vcpu);
+fail:
+	return r;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+		unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long vm_base = kvm->arch.vm_base;
+	int r;
+	int cpu;
+
+	r = -ENOMEM;
+	if (!vm_base) {
+		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
+		goto fail;
+	}
+	vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+	vcpu->kvm = kvm;
+
+	cpu = get_cpu();
+	vti_vcpu_load(vcpu, cpu);
+	r = vti_vcpu_setup(vcpu, id);
+	put_cpu();
+
+	if (r) {
+		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
+		goto fail;
+	}
+
+	return vcpu;
+fail:
+	return ERR_PTR(r);
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+		struct kvm_debug_guest *dbg)
+{
+	return -EINVAL;
+}
+
+static void free_kvm(struct kvm *kvm)
+{
+	unsigned long vm_base = kvm->arch.vm_base;
+
+	if (vm_base) {
+		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
+	}
+
+}
+
+static void kvm_release_vm_pages(struct kvm *kvm)
+{
+	struct kvm_memory_slot *memslot;
+	int i, j;
+	unsigned long base_gfn;
+
+	for (i = 0; i < kvm->nmemslots; i++) {
+		memslot = &kvm->memslots[i];
+		base_gfn = memslot->base_gfn;
+
+		for (j = 0; j < memslot->npages; j++) {
+			if (memslot->rmap[j])
+				put_page((struct page *)memslot->rmap[j]);
+		}
+	}
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kfree(kvm->arch.vioapic);
+	kvm_release_vm_pages(kvm);
+	kvm_free_physmem(kvm);
+	free_kvm(kvm);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	if (cpu != vcpu->cpu) {
+		vcpu->cpu = cpu;
+		if (vcpu->arch.ht_active)
+			kvm_migrate_hlt_timer(vcpu);
+	}
+}
+
+#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+	int r;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		regs->vpd.vgr[i] = vpd->vgr[i];
+		regs->vpd.vbgr[i] = vpd->vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		regs->vpd.vcr[i] = vpd->vcr[i];
+	regs->vpd.vhpi = vpd->vhpi;
+	regs->vpd.vnat = vpd->vnat;
+	regs->vpd.vbnat = vpd->vbnat;
+	regs->vpd.vpsr = vpd->vpsr;
+	regs->vpd.vpr = vpd->vpr;
+
+	r = -EFAULT;
+	r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
+					sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+	if (r)
+		goto out;
+	SAVE_REGS(mp_state);
+	SAVE_REGS(vmm_rr);
+	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
+	SAVE_REGS(itr_regions);
+	SAVE_REGS(dtr_regions);
+	SAVE_REGS(tc_regions);
+	SAVE_REGS(irq_check);
+	SAVE_REGS(itc_check);
+	SAVE_REGS(timer_check);
+	SAVE_REGS(timer_pending);
+	SAVE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		regs->vrr[i] = vcpu->arch.vrr[i];
+		regs->ibr[i] = vcpu->arch.ibr[i];
+		regs->dbr[i] = vcpu->arch.dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		regs->insvc[i] = vcpu->arch.insvc[i];
+	regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+	SAVE_REGS(xtp);
+	SAVE_REGS(metaphysical_rr0);
+	SAVE_REGS(metaphysical_rr4);
+	SAVE_REGS(metaphysical_saved_rr0);
+	SAVE_REGS(metaphysical_saved_rr4);
+	SAVE_REGS(fp_psr);
+	SAVE_REGS(saved_gp);
+	vcpu_put(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+
+	hrtimer_cancel(&vcpu->arch.hlt_timer);
+	kfree(vcpu->arch.apic);
+}
+
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	unsigned long i;
+	struct page *page;
+	int npages = mem->memory_size >> PAGE_SHIFT;
+	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	unsigned long base_gfn = memslot->base_gfn;
+
+	for (i = 0; i < npages; i++) {
+		page = gfn_to_page(kvm, base_gfn + i);
+		kvm_set_pmt_entry(kvm, base_gfn + i,
+				page_to_pfn(page) << PAGE_SHIFT,
+				_PAGE_AR_RWX|_PAGE_MA_WB);
+		memslot->rmap[i] = (unsigned long)page;
+	}
+
+	return 0;
+}
+
+
+long kvm_arch_dev_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_uninit(vcpu);
+}
+
+static int vti_cpu_has_kvm_support(void)
+{
+	long  avail = 1, status = 1, control = 1;
+	long ret;
+
+	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
+	if (ret)
+		goto out;
+
+	if (!(avail & PAL_PROC_VM_BIT))
+		goto out;
+
+	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
+
+	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
+	if (ret)
+		goto out;
+	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
+
+	if (!(vp_env_info & VP_OPCODE)) {
+		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
+				"vm_env_info:0x%lx\n", vp_env_info);
+	}
+
+	return 1;
+out:
+	return 0;
+}
+
+static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
+						struct module *module)
+{
+	unsigned long module_base;
+	unsigned long vmm_size;
+
+	unsigned long vmm_offset, func_offset, fdesc_offset;
+	struct fdesc *p_fdesc;
+
+	BUG_ON(!module);
+
+	if (!kvm_vmm_base) {
+		printk("kvm: kvm area hasn't been initilized yet!!\n");
+		return -EFAULT;
+	}
+
+	/*Calculate new position of relocated vmm module.*/
+	module_base = (unsigned long)module->module_core;
+	vmm_size = module->core_size;
+	if (unlikely(vmm_size > KVM_VMM_SIZE))
+		return -EFAULT;
+
+	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_flush_icache(kvm_vmm_base, vmm_size);
+
+	/*Recalculate kvm_vmm_info based on new VMM*/
+	vmm_offset = vmm_info->vmm_ivt - module_base;
+	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
+	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
+			kvm_vmm_info->vmm_ivt);
+
+	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
+	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
+							fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
+			KVM_VMM_BASE+func_offset);
+
+	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
+	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
+			fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
+
+	kvm_vmm_gp = p_fdesc->gp;
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
+						kvm_vmm_info->vmm_entry);
+	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
+						KVM_VMM_BASE + func_offset);
+
+	return 0;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	int r;
+	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
+
+	if (!vti_cpu_has_kvm_support()) {
+		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (kvm_vmm_info) {
+		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
+		r = -EEXIST;
+		goto out;
+	}
+
+	r = -ENOMEM;
+	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
+	if (!kvm_vmm_info)
+		goto out;
+
+	if (kvm_alloc_vmm_area())
+		goto out_free0;
+
+	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
+	if (r)
+		goto out_free1;
+
+	return 0;
+
+out_free1:
+	kvm_free_vmm_area();
+out_free0:
+	kfree(kvm_vmm_info);
+out:
+	return r;
+}
+
+void kvm_arch_exit(void)
+{
+	kvm_free_vmm_area();
+	kfree(kvm_vmm_info);
+	kvm_vmm_info = NULL;
+}
+
+static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	struct kvm_memory_slot *memslot;
+	int r, i;
+	long n, base;
+	unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+					+ KVM_MEM_DIRTY_LOG_OFS);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	base = memslot->base_gfn / BITS_PER_LONG;
+
+	for (i = 0; i < n/sizeof(long); ++i) {
+		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+		dirty_bitmap[base + i] = 0;
+	}
+	r = 0;
+out:
+	return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	int r;
+	int n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	spin_lock(&kvm->arch.dirty_log_lock);
+
+	r = kvm_ia64_sync_dirty_log(kvm, log);
+	if (r)
+		goto out;
+
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		kvm_flush_remote_tlbs(kvm);
+		memslot = &kvm->memslots[log->slot];
+		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	spin_unlock(&kvm->arch.dirty_log_lock);
+	return r;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int ipi_pcpu = vcpu->cpu;
+
+	if (waitqueue_active(&vcpu->wq))
+		wake_up_interruptible(&vcpu->wq);
+
+	if (vcpu->guest_mode)
+		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+{
+
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vec, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
+			kvm_vcpu_kick(vcpu);
+		else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+			if (waitqueue_active(&vcpu->wq))
+				wake_up_interruptible(&vcpu->wq);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+	return apic->vcpu->vcpu_id == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
+				       unsigned long bitmap)
+{
+	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
+	int i;
+
+	for (i = 1; i < KVM_MAX_VCPUS; i++) {
+		if (!kvm->vcpus[i])
+			continue;
+		if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
+			lvcpu = kvm->vcpus[i];
+	}
+
+	return lvcpu;
+}
+
+static int find_highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+
+	return -1;
+}
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+    if (vpd->irr[0] & (1UL << NMI_VECTOR))
+		return NMI_VECTOR;
+    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+    return find_highest_bits((int *)&vpd->irr[0]);
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	if (kvm_highest_pending_irq(vcpu) != -1)
+		return 1;
+	return 0;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 0000000..091f936
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,500 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@intel.com>
+ * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
+ * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+
+#include "vti.h"
+#include "misc.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+/*
+ * Handy macros to make sure that the PAL return values start out
+ * as something meaningful.
+ */
+#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
+	{						\
+		x.status = PAL_STATUS_UNIMPLEMENTED;	\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+	}
+
+#define INIT_PAL_STATUS_SUCCESS(x)			\
+	{						\
+		x.status = PAL_STATUS_SUCCESS;		\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+    }
+
+static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
+		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
+	struct exit_ctl_data *p;
+
+	if (vcpu) {
+		p = &vcpu->arch.exit_data;
+		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+			*gr28 = p->u.pal_data.gr28;
+			*gr29 = p->u.pal_data.gr29;
+			*gr30 = p->u.pal_data.gr30;
+			*gr31 = p->u.pal_data.gr31;
+			return ;
+		}
+	}
+	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
+}
+
+static void set_pal_result(struct kvm_vcpu *vcpu,
+		struct ia64_pal_retval result) {
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+		p->u.pal_data.ret = result;
+		return ;
+	}
+	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
+}
+
+static void set_sal_result(struct kvm_vcpu *vcpu,
+		struct sal_ret_values result) {
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+		p->u.sal_data.ret = result;
+		return ;
+	}
+	printk(KERN_WARNING"Failed to set sal result!!\n");
+}
+
+struct cache_flush_args {
+	u64 cache_type;
+	u64 operation;
+	u64 progress;
+	long status;
+};
+
+cpumask_t cpu_cache_coherent_map;
+
+static void remote_pal_cache_flush(void *data)
+{
+	struct cache_flush_args *args = data;
+	long status;
+	u64 progress = args->progress;
+
+	status = ia64_pal_cache_flush(args->cache_type, args->operation,
+					&progress, NULL);
+	if (status != 0)
+	args->status = status;
+}
+
+static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
+{
+	u64 gr28, gr29, gr30, gr31;
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	struct cache_flush_args args = {0, 0, 0, 0};
+	long psr;
+
+	gr28 = gr29 = gr30 = gr31 = 0;
+	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
+
+	if (gr31 != 0)
+		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
+
+	/* Always call Host Pal in int=1 */
+	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
+	args.cache_type = gr29;
+	args.operation = gr30;
+	smp_call_function(remote_pal_cache_flush,
+				(void *)&args, 1, 1);
+	if (args.status != 0)
+		printk(KERN_ERR"pal_cache_flush error!,"
+				"status:0x%lx\n", args.status);
+	/*
+	 * Call Host PAL cache flush
+	 * Clear psr.ic when call PAL_CACHE_FLUSH
+	 */
+	local_irq_save(psr);
+	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
+						&result.v0);
+	local_irq_restore(psr);
+	if (result.status != 0)
+		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
+				"in1:%lx,in2:%lx\n",
+				vcpu, result.status, gr29, gr30);
+
+#if 0
+	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
+		cpus_setall(vcpu->arch.cache_coherent_map);
+		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
+		cpus_setall(cpu_cache_coherent_map);
+		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
+	}
+#endif
+	return result;
+}
+
+struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
+
+	/*
+	 * PAL_FREQ_BASE may not be implemented in some platforms,
+	 * call SAL instead.
+	 */
+	if (result.v0 == 0) {
+		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+							&result.v0,
+							&result.v1);
+		result.v2 = 0;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_SUCCESS(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
+			&result.v2, in2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
+{
+
+	pal_cache_config_info_t ci;
+	long status;
+	unsigned long in0, in1, in2, in3, r9, r10;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_cache_config_info(in1, in2, &ci);
+	r9 = ci.pcci_info_1.pcci1_data;
+	r10 = ci.pcci_info_2.pcci2_data;
+	return ((struct ia64_pal_retval){status, r9, r10, 0});
+}
+
+#define GUEST_IMPL_VA_MSB	59
+#define GUEST_RID_BITS		18
+
+static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
+{
+
+	pal_vm_info_1_u_t vminfo1;
+	pal_vm_info_2_u_t vminfo2;
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
+	if (!result.status) {
+		vminfo1.pvi1_val = result.v0;
+		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
+		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
+		result.v0 = vminfo1.pvi1_val;
+		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
+		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
+		result.v1 = vminfo2.pvi2_val;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+
+	return result;
+}
+
+static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
+{
+	u64 index = 0;
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+		index = p->u.pal_data.gr28;
+
+	return index;
+}
+
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+
+	u64 gr28;
+	struct ia64_pal_retval result;
+	int ret = 1;
+
+	gr28 = kvm_get_pal_call_index(vcpu);
+	/*printk("pal_call index:%lx\n",gr28);*/
+	switch (gr28) {
+	case PAL_CACHE_FLUSH:
+		result = pal_cache_flush(vcpu);
+		break;
+	case PAL_CACHE_SUMMARY:
+		result = pal_cache_summary(vcpu);
+		break;
+	case PAL_HALT_LIGHT:
+	{
+		vcpu->arch.timer_pending = 1;
+		INIT_PAL_STATUS_SUCCESS(result);
+		if (kvm_highest_pending_irq(vcpu) == -1)
+			ret = kvm_emulate_halt(vcpu);
+
+	}
+		break;
+
+	case PAL_FREQ_RATIOS:
+		result = pal_freq_ratios(vcpu);
+		break;
+
+	case PAL_FREQ_BASE:
+		result = pal_freq_base(vcpu);
+		break;
+
+	case PAL_LOGICAL_TO_PHYSICAL :
+		result = pal_logical_to_physica(vcpu);
+		break;
+
+	case PAL_VM_SUMMARY :
+		result = pal_vm_summary(vcpu);
+		break;
+
+	case PAL_VM_INFO :
+		result = pal_vm_info(vcpu);
+		break;
+	case PAL_PLATFORM_ADDR :
+		result = pal_platform_addr(vcpu);
+		break;
+	case PAL_CACHE_INFO:
+		result = pal_cache_info(vcpu);
+		break;
+	case PAL_PTCE_INFO:
+		INIT_PAL_STATUS_SUCCESS(result);
+		result.v1 = (1L << 32) | 1L;
+		break;
+	case PAL_VM_PAGE_SIZE:
+		result.status = ia64_pal_vm_page_size(&result.v0,
+							&result.v1);
+		break;
+	case PAL_RSE_INFO:
+		result.status = ia64_pal_rse_info(&result.v0,
+					(pal_hints_u_t *)&result.v1);
+		break;
+	case PAL_PROC_GET_FEATURES:
+		result = pal_proc_get_features(vcpu);
+		break;
+	case PAL_DEBUG_INFO:
+		result.status = ia64_pal_debug_info(&result.v0,
+							&result.v1);
+		break;
+	case PAL_VERSION:
+		result.status = ia64_pal_version(
+				(pal_version_u_t *)&result.v0,
+				(pal_version_u_t *)&result.v1);
+
+		break;
+	case PAL_FIXED_ADDR:
+		result.status = PAL_STATUS_SUCCESS;
+		result.v0 = vcpu->vcpu_id;
+		break;
+	default:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		printk(KERN_WARNING"kvm: Unsupported pal call,"
+					" index:0x%lx\n", gr28);
+	}
+	set_pal_result(vcpu, result);
+	return ret;
+}
+
+static struct sal_ret_values sal_emulator(struct kvm *kvm,
+				long index, unsigned long in1,
+				unsigned long in2, unsigned long in3,
+				unsigned long in4, unsigned long in5,
+				unsigned long in6, unsigned long in7)
+{
+	unsigned long r9  = 0;
+	unsigned long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	status = 0;
+	switch (index) {
+	case SAL_FREQ_BASE:
+		status = ia64_sal_freq_base(in1, &r9, &r10);
+		break;
+	case SAL_PCI_CONFIG_READ:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_READ\n");
+		break;
+	case SAL_PCI_CONFIG_WRITE:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_WRITE\n");
+		break;
+	case SAL_SET_VECTORS:
+		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
+				status = -2;
+			} else {
+				kvm->arch.rdv_sal_data.boot_ip = in2;
+				kvm->arch.rdv_sal_data.boot_gp = in3;
+			}
+			printk("Rendvous called! iip:%lx\n\n", in2);
+		} else
+			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
+							"ignored...\n", in1);
+		break;
+	case SAL_GET_STATE_INFO:
+		/* No more info.  */
+		status = -5;
+		r9 = 0;
+		break;
+	case SAL_GET_STATE_INFO_SIZE:
+		/* Return a dummy size.  */
+		status = 0;
+		r9 = 128;
+		break;
+	case SAL_CLEAR_STATE_INFO:
+		/* Noop.  */
+		break;
+	case SAL_MC_RENDEZ:
+		printk(KERN_WARNING
+			"kvm: called SAL_MC_RENDEZ. ignored...\n");
+		break;
+	case SAL_MC_SET_PARAMS:
+		printk(KERN_WARNING
+			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
+		break;
+	case SAL_CACHE_FLUSH:
+		if (1) {
+			/*Flush using SAL.
+			This method is faster but has a side
+			effect on other vcpu running on
+			this cpu.  */
+			status = ia64_sal_cache_flush(in1);
+		} else {
+			/*Maybe need to implement the method
+			without side effect!*/
+			status = 0;
+		}
+		break;
+	case SAL_CACHE_INIT:
+		printk(KERN_WARNING
+			"kvm: called SAL_CACHE_INIT.  ignored...\n");
+		break;
+	case SAL_UPDATE_PAL:
+		printk(KERN_WARNING
+			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
+		break;
+	default:
+		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
+						" index:%ld\n", index);
+		status = -1;
+		break;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
+		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p) {
+		if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+			*in0 = p->u.sal_data.in0;
+			*in1 = p->u.sal_data.in1;
+			*in2 = p->u.sal_data.in2;
+			*in3 = p->u.sal_data.in3;
+			*in4 = p->u.sal_data.in4;
+			*in5 = p->u.sal_data.in5;
+			*in6 = p->u.sal_data.in6;
+			*in7 = p->u.sal_data.in7;
+			return ;
+		}
+	}
+	*in0 = 0;
+}
+
+void kvm_sal_emul(struct kvm_vcpu *vcpu)
+{
+
+	struct sal_ret_values result;
+	u64 index, in1, in2, in3, in4, in5, in6, in7;
+
+	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
+			&in3, &in4, &in5, &in6, &in7);
+	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
+					in4, in5, in6, in7);
+	set_sal_result(vcpu, result);
+}
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 0000000..13980d9
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,273 @@
+/*
+ *  kvm_minstate.h: min save macros
+ *  Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/types.h>
+#include <asm/kregs.h>
+#include "asm-offsets.h"
+
+#define KVM_MINSTATE_START_SAVE_MIN	     					\
+	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
+	;;									\
+	mov.m r28 = ar.rnat;                                  			\
+	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
+	;;									\
+	lfetch.fault.excl.nt1 [r22];						\
+	addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1;  /* compute base of memory stack */  \
+	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
+	;;									\
+	mov ar.bspstore = r22;				/* switch to kernel RBS */\
+	;;									\
+	mov r18 = ar.bsp;							\
+	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+
+
+#define KVM_MINSTATE_END_SAVE_MIN						\
+	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
+	;;
+
+
+#define PAL_VSA_SYNC_READ						\
+	/* begin to call pal vps sync_read */				\
+	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21;  /* entry point */	\
+	;;								\
+	ld8 r25 = [r25];      /* read vpd base */			\
+	ld8 r20 = [r20];						\
+	;;								\
+	add r20 = PAL_VPS_SYNC_READ,r20;				\
+	;;								\
+{ .mii;									\
+	nop 0x0;							\
+	mov r24 = ip;							\
+	mov b0 = r20;							\
+	;;								\
+};									\
+{ .mmb;									\
+	add r24 = 0x20, r24;						\
+	nop 0x0;							\
+	br.cond.sptk b0;        /*  call the service */			\
+	;;								\
+};
+
+
+
+#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
+
+/*
+ * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *  psr.ic: off
+ *  r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *  psr.ic: off
+ *   r2 = points to &pt_regs.r16
+ *   r8 = contents of ar.ccv
+ *   r9 = contents of ar.csd
+ *  r10 = contents of ar.ssd
+ *  r11 = FPSR_DEFAULT
+ *  r12 = kernel sp (kernel virtual address)
+ *  r13 = points to current task_struct (kernel virtual address)
+ *  p15 = TRUE if psr.i is set in cr.ipsr
+ *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *	  preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+
+
+#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
+
+#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
+	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
+	mov r27 = ar.rsc;         /* M */			\
+	mov r20 = r1;         /* A */				\
+	mov r25 = ar.unat;        /* M */			\
+	mov r29 = cr.ipsr;        /* M */			\
+	mov r26 = ar.pfs;         /* I */			\
+	mov r18 = cr.isr;         				\
+	COVER;              /* B;; (or nothing) */		\
+	;;							\
+	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
+	mov r1 = r16;						\
+/*	mov r21=r16;	*/					\
+	/* switch from user to kernel RBS: */			\
+	;;							\
+	invala;             /* M */				\
+	SAVE_IFS;						\
+	;;							\
+	KVM_MINSTATE_START_SAVE_MIN				\
+	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
+	adds r16 = PT(CR_IPSR),r1;				\
+	;;							\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
+	st8 [r16] = r29;      /* save cr.ipsr */		\
+	;;							\
+	lfetch.fault.excl.nt1 [r17];				\
+	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
+	mov r29 = b0						\
+	;;							\
+	adds r16 = PT(R8),r1; /* initialize first base pointer */\
+	adds r17 = PT(R9),r1; /* initialize second base pointer */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r8,16;			\
+.mem.offset 8,0; st8.spill [r17] = r9,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r10,24;			\
+.mem.offset 8,0; st8.spill [r17] = r11,24;			\
+	;;							\
+	mov r9 = cr.iip;         /* M */			\
+	mov r10 = ar.fpsr;        /* M */			\
+	;;							\
+	st8 [r16] = r9,16;    /* save cr.iip */			\
+	st8 [r17] = r30,16;   /* save cr.ifs */			\
+	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
+	;;							\
+	st8 [r16] = r25,16;   /* save ar.unat */		\
+	st8 [r17] = r26,16;    /* save ar.pfs */		\
+	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
+	;;							\
+	st8 [r16] = r27,16;   /* save ar.rsc */			\
+	st8 [r17] = r28,16;   /* save ar.rnat */		\
+	;;          /* avoid RAW on r16 & r17 */		\
+	st8 [r16] = r23,16;   /* save ar.bspstore */		\
+	st8 [r17] = r31,16;   /* save predicates */		\
+	;;							\
+	st8 [r16] = r29,16;   /* save b0 */			\
+	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
+.mem.offset 8,0; st8.spill [r17] = r12,16;			\
+	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r13,16;			\
+.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
+	mov r13 = r21;   /* establish `current' */		\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r15,16;			\
+.mem.offset 8,0; st8.spill [r17] = r14,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r2,16;			\
+.mem.offset 8,0; st8.spill [r17] = r3,16;			\
+	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
+	 ;;							\
+	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
+	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
+	mov r26 = cr.iipa;					\
+	mov r27 = cr.isr;					\
+	;;							\
+	st8 [r16] = r26;					\
+	st8 [r17] = r27;					\
+	;;							\
+	EXTRA;							\
+	mov r8 = ar.ccv;					\
+	mov r9 = ar.csd;					\
+	mov r10 = ar.ssd;					\
+	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
+	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
+	;;							\
+	ld8 r1 = [r17];/* establish kernel global pointer */	\
+	;;							\
+	PAL_VSA_SYNC_READ					\
+	KVM_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *  psr.ic: on
+ *  r2: points to &pt_regs.f6
+ *  r3: points to &pt_regs.f7
+ *  r8: contents of ar.ccv
+ *  r9: contents of ar.csd
+ *  r10:	contents of ar.ssd
+ *  r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define KVM_SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2] = r16,16;	\
+.mem.offset 8,0; st8.spill [r3] = r17,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r18,16;	\
+.mem.offset 8,0; st8.spill [r3] = r19,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r20,16;	\
+.mem.offset 8,0; st8.spill [r3] = r21,16;	\
+	mov r18=b6;			\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r22,16;	\
+.mem.offset 8,0; st8.spill [r3] = r23,16;	\
+	mov r19 = b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r24,16;	\
+.mem.offset 8,0; st8.spill [r3] = r25,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r26,16;	\
+.mem.offset 8,0; st8.spill [r3] = r27,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r28,16;	\
+.mem.offset 8,0; st8.spill [r3] = r29,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r30,16;	\
+.mem.offset 8,0; st8.spill [r3] = r31,32;	\
+	;;					\
+	mov ar.fpsr = r11;			\
+	st8 [r2] = r8,8;			\
+	adds r24 = PT(B6)-PT(F7),r3;		\
+	adds r25 = PT(B7)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r18,16;       /* b6 */	\
+	st8 [r25] = r19,16;       /* b7 */	\
+	adds r2 = PT(R4)-PT(F6),r2;		\
+	adds r3 = PT(R5)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r9;	/* ar.csd */		\
+	st8 [r25] = r10;	/* ar.ssd */	\
+	;;					\
+	mov r18 = ar.unat;			\
+	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
+	;;					\
+	st8 [r19] = r18; /* eml_unat */ 	\
+
+
+#define KVM_SAVE_EXTRA				\
+.mem.offset 0,0; st8.spill [r2] = r4,16;	\
+.mem.offset 8,0; st8.spill [r3] = r5,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r6,16;	\
+.mem.offset 8,0; st8.spill [r3] = r7;		\
+	;;					\
+	mov r26 = ar.unat;			\
+	;;					\
+	st8 [r2] = r26;/* eml_unat */ 		\
+
+#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
+#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
+#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 0000000..6d6cbcb
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,25 @@
+#ifndef __KVM_IA64_LAPIC_H
+#define __KVM_IA64_LAPIC_H
+
+#include <linux/kvm_host.h>
+
+/*
+ * vlsapic
+ */
+struct kvm_lapic{
+	struct kvm_vcpu *vcpu;
+	uint64_t insvc[4];
+	uint64_t vhpi;
+	uint8_t xtp;
+	uint8_t pal_init_pending;
+	uint8_t pad[2];
+};
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+
+#endif
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 0000000..e585c46
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,93 @@
+#ifndef __KVM_IA64_MISC_H
+#define __KVM_IA64_MISC_H
+
+#include <linux/kvm_host.h>
+/*
+ * misc.h
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ *Return p2m base address at host side!
+ */
+static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
+{
+	return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+}
+
+static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
+		u64 paddr, u64 mem_flags)
+{
+	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
+	unsigned long pte;
+
+	pte = PAGE_ALIGN(paddr) | mem_flags;
+	pmt_base[gfn] = pte;
+}
+
+/*Function for translating host address to guest address*/
+
+static inline void *to_guest(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
+			KVM_VM_DATA_BASE);
+}
+
+/*Function for translating guest address to host address*/
+
+static inline void *to_host(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
+			+ kvm->arch.vm_base);
+}
+
+/* Get host context of the vcpu */
+static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.host;
+	return to_guest(vcpu->kvm, ctx);
+}
+
+/* Get guest context of the vcpu */
+static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.guest;
+	return  to_guest(vcpu->kvm, ctx);
+}
+
+/* kvm get exit data from gvmm! */
+static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.exit_data;
+}
+
+/*kvm get vcpu ioreq for kvm module!*/
+static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_ctl_data;
+
+	if (vcpu) {
+		p_ctl_data = kvm_get_exit_data(vcpu);
+		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
+			return &p_ctl_data->u.ioreq;
+	}
+
+	return NULL;
+}
+
+#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 0000000..351bf70
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,341 @@
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
+{
+	VLSAPIC_XTP(v) = val;
+}
+
+/*
+ * LSAPIC OFFSET
+ */
+#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
+#define PIB_OFST_INTA          0x1E0000
+#define PIB_OFST_XTP           0x1E0008
+
+/*
+ * execute write IPI op.
+ */
+static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
+					uint64_t addr, uint64_t data)
+{
+	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	p->exit_reason = EXIT_REASON_IPI;
+	p->u.ipi_data.addr.val = addr;
+	p->u.ipi_data.data.val = data;
+	vmm_transition(current_vcpu);
+
+	local_irq_restore(psr);
+
+}
+
+void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
+			unsigned long length, unsigned long val)
+{
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
+		panic_vm(v);
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			vlsapic_write_xtp(v, val);
+		} else {
+			/*panic_domain(NULL,
+			"Undefined write on PIB XTP\n");*/
+			panic_vm(v);
+		}
+		break;
+	default:
+		if (PIB_LOW_HALF(addr)) {
+			/*lower half */
+			if (length != 8)
+				/*panic_domain(NULL,
+				"Can't LHF write with size %ld!\n",
+				length);*/
+				panic_vm(v);
+			else
+				vlsapic_write_ipi(v, addr, val);
+		} else {   /*	upper half
+				printk("IPI-UHF write %lx\n",addr);*/
+			panic_vm(v);
+		}
+		break;
+	}
+}
+
+unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
+		unsigned long length)
+{
+	uint64_t result = 0;
+
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		if (length == 1) /* 1 byte load */
+			; /* There is no i8259, there is no INTA access*/
+		else
+			/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
+			panic_vm(v);
+
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			result = VLSAPIC_XTP(v);
+			/* printk("read xtp %lx\n", result); */
+		} else {
+			/*panic_domain(NULL,
+			"Undefined read on PIB XTP\n");*/
+			panic_vm(v);
+		}
+		break;
+	default:
+		panic_vm(v);
+		break;
+	}
+	return result;
+}
+
+static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
+					u16 s, int ma, int dir)
+{
+	unsigned long iot;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
+
+	local_irq_save(psr);
+
+	/*Intercept the acces for PIB range*/
+	if (iot == GPFN_PIB) {
+		if (!dir)
+			lsapic_write(vcpu, src_pa, s, *dest);
+		else
+			*dest = lsapic_read(vcpu, src_pa, s);
+		goto out;
+	}
+	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
+	p->u.ioreq.addr = src_pa;
+	p->u.ioreq.size = s;
+	p->u.ioreq.dir = dir;
+	if (dir == IOREQ_WRITE)
+		p->u.ioreq.data = *dest;
+	p->u.ioreq.state = STATE_IOREQ_READY;
+	vmm_transition(vcpu);
+
+	if (p->u.ioreq.state == STATE_IORESP_READY) {
+		if (dir == IOREQ_READ)
+			*dest = p->u.ioreq.data;
+	} else
+		panic_vm(vcpu);
+out:
+	local_irq_restore(psr);
+	return ;
+}
+
+/*
+   dir 1: read 0:write
+   inst_type 0:integer 1:floating point
+ */
+#define SL_INTEGER	0	/* store/load interger*/
+#define SL_FLOATING	1     	/* store/load floating*/
+
+void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
+{
+	struct kvm_pt_regs *regs;
+	IA64_BUNDLE bundle;
+	int slot, dir = 0;
+	int inst_type = -1;
+	u16 size = 0;
+	u64 data, slot1a, slot1b, temp, update_reg;
+	s32 imm;
+	INST64 inst;
+
+	regs = vcpu_regs(vcpu);
+
+	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
+		/* if fetch code fail, return and try again */
+		return;
+	}
+	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+	if (!slot)
+		inst.inst = bundle.slot0;
+	else if (slot == 1) {
+		slot1a = bundle.slot1a;
+		slot1b = bundle.slot1b;
+		inst.inst = slot1a + (slot1b << 18);
+	} else if (slot == 2)
+		inst.inst = bundle.slot2;
+
+	/* Integer Load/Store */
+	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
+		inst_type = SL_INTEGER;
+		size = (inst.M1.x6 & 0x3);
+		if ((inst.M1.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M4.r2);
+		} else if ((inst.M1.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+		}
+	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
+		/* Integer Load + Reg update */
+		inst_type = SL_INTEGER;
+		dir = IOREQ_READ;
+		size = (inst.M2.x6 & 0x3);
+		temp = vcpu_get_gr(vcpu, inst.M2.r3);
+		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
+		temp += update_reg;
+		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
+	} else if (inst.M3.major == 5) {
+		/*Integer Load/Store + Imm update*/
+		inst_type = SL_INTEGER;
+		size = (inst.M3.x6&0x3);
+		if ((inst.M5.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M5.r2);
+			temp = vcpu_get_gr(vcpu, inst.M5.r3);
+			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
+				(inst.M5.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
+
+		} else if ((inst.M3.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+			temp = vcpu_get_gr(vcpu, inst.M3.r3);
+			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
+				(inst.M3.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
+
+		}
+	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
+				&& inst.M9.m == 0 && inst.M9.x == 0) {
+		/* Floating-point spill*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
+		/* Write high word. FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
+		/* Floating-point spill + Imm update */
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+
+		/* Write high word.FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
+		/* Floating-point stf8 + Imm update */
+		struct ia64_fpreg v;
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		size = 3;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		data = v.u.bits[0]; /* Significand.  */
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
+			&& inst.M15.x6 <= 0x2f) {
+		temp = vcpu_get_gr(vcpu, inst.M15.r3);
+		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
+			(inst.M15.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
+
+		vcpu_increment_iip(vcpu);
+		return;
+	} else if (inst.M12.major == 6 && inst.M12.m == 1
+			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
+		/* Floating-point Load Pair + Imm ldfp8 M12*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_READ;
+		size = 8;     /*ldfd*/
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
+		padr += 8;
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
+		padr += 8;
+		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
+		vcpu_increment_iip(vcpu);
+		return;
+	} else {
+		inst_type = -1;
+		panic_vm(vcpu);
+	}
+
+	size = 1 << size;
+	if (dir == IOREQ_WRITE) {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+	} else {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		if (inst_type == SL_INTEGER)
+			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
+		else
+			panic_vm(vcpu);
+
+	}
+	vcpu_increment_iip(vcpu);
+}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 0000000..e4f15d6
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,918 @@
+/*
+ * arch/ia64/vmx/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+#include "vti.h"
+#include "asm-offsets.h"
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+#define ACCE_MOV_TO_RR
+#define ACCE_RSM
+#define ACCE_SSM
+#define ACCE_MOV_TO_PSR
+#define ACCE_THASH
+
+//mov r1=ar3
+GLOBAL_ENTRY(kvm_asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	;;
+	ld8 r18=[r18]
+	mov r19=ar.itc
+	mov r24=b0
+	;;
+	add r19=r19,r18
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	st8 [r16] = r19
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	shladd r17=r17,4,r20
+	;;
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar)
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(kvm_asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,6,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r24=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_from_rr_back_1:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+	shr.u r26=r19,61
+	;;
+	shladd r17=r17,4,r22
+	shladd r27=r26,3,r27
+	;;
+	ld8 r19=[r27]
+	mov b0=r17
+	br.many b0
+END(kvm_asm_mov_from_rr)
+
+
+// mov rr[r3]=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_rr)
+#ifndef ACCE_MOV_TO_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,13,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r22=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_to_rr_back_1:
+	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
+	shr.u r23=r19,61
+	shladd r17=r17,4,r20
+	;;
+	//if rr6, go back
+	cmp.eq p6,p0=6,r23
+	mov b0=r22
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	;;
+	mov r28=r19
+	mov b0=r17
+	br.many b0
+kvm_asm_mov_to_rr_back_2:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	shladd r27=r23,3,r27
+	;; // vrr.rid<<4 |0xe
+	st8 [r27]=r19
+	mov b0=r30
+	;;
+	extr.u r16=r19,8,26
+	extr.u r18 =r19,2,6
+	mov r17 =0xe
+	;;
+	shladd r16 = r16, 4, r17
+	extr.u r19 =r19,0,8
+	;;
+	shl r16 = r16,8
+	;;
+	add r19 = r19, r16
+	;; //set ve 1
+	dep r19=-1,r19,0,1
+	cmp.lt p6,p0=14,r18
+	;;
+	(p6) mov r18=14
+	;;
+	(p6) dep r19=r18,r19,2,6
+	;;
+	cmp.eq p6,p0=0,r23
+	;;
+	cmp.eq.or p6,p0=4,r23
+	;;
+	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	;;
+	ld4 r16=[r16]
+	cmp.eq p7,p0=r0,r0
+	(p6) shladd r17=r23,1,r17
+	;;
+	(p6) st8 [r17]=r19
+	(p6) tbit.nz p6,p7=r16,0
+	;;
+	(p7) mov rr[r28]=r19
+	mov r24=r22
+	br.many b0
+END(kvm_asm_mov_to_rr)
+
+
+//rsm
+GLOBAL_ENTRY(kvm_asm_rsm)
+#ifndef ACCE_RSM
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	ld8 r16=[r16]
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;
+	add r17=VPD_VPSR_START_OFFSET,r16
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	//r26 is imm24
+	dep r26=r28,r26,23,1
+	;;
+	ld8 r18=[r17]
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
+	ld4 r23=[r22]
+	sub r27=-1,r26
+	mov r24=b0
+	;;
+	mov r20=cr.ipsr
+	or r28=r27,r28
+	and r19=r18,r27
+	;;
+	st8 [r17]=r19
+	and r20=r20,r28
+	/* Comment it out due to short of fp lazy alorgithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	*/
+	;;
+	mov cr.ipsr=r20
+	tbit.nz p6,p0=r23,0
+	;;
+	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
+	(p6) br.dptk kvm_resume_to_guest
+	;;
+	add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	dep r23=-1,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	br.many kvm_resume_to_guest
+END(kvm_asm_rsm)
+
+
+//ssm
+GLOBAL_ENTRY(kvm_asm_ssm)
+#ifndef ACCE_SSM
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	ld8 r16=[r16]
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;  //r26 is imm24
+	add r27=VPD_VPSR_START_OFFSET,r16
+	dep r26=r28,r26,23,1
+	;;  //r19 vpsr
+	ld8 r29=[r27]
+	mov r24=b0
+	;;
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	mov r20=cr.ipsr
+	or r19=r29,r26
+	;;
+	ld4 r23=[r22]
+	st8 [r27]=r19
+	or r20=r20,r26
+	;;
+	mov cr.ipsr=r20
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	and r19=r28,r19
+	tbit.z p6,p0=r23,0
+	;;
+	cmp.ne.or p6,p0=r28,r19
+	(p6) br.dptk kvm_asm_ssm_1
+	;;
+	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_ssm_1:
+	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
+	;;
+	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
+	(p6) br.dptk kvm_resume_to_guest
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest
+END(kvm_asm_ssm)
+
+
+//mov psr.l=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_psr)
+#ifndef ACCE_MOV_TO_PSR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,13,7 //r2
+	;;
+	ld8 r16=[r16]
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
+	shladd r26=r26,4,r20
+	mov r24=b0
+	;;
+	add r27=VPD_VPSR_START_OFFSET,r16
+	mov b0=r26
+	br.many b0
+	;;
+kvm_asm_mov_to_psr_back:
+	ld8 r17=[r27]
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	dep r19=0,r19,32,32
+	;;
+	ld4 r23=[r22]
+	dep r18=0,r17,0,32
+	;;
+	add r30=r18,r19
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	st8 [r27]=r30
+	and r27=r28,r30
+	and r29=r28,r17
+	;;
+	cmp.eq p5,p0=r29,r27
+	cmp.eq p6,p7=r28,r27
+	(p5) br.many kvm_asm_mov_to_psr_1
+	;;
+	//virtual to physical
+	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	(p7) dep r23=-1,r23,0,1
+	;;
+	//physical to virtual
+	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	(p6) dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_mov_to_psr_1:
+	mov r20=cr.ipsr
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
+	;;
+	or r19=r19,r28
+	dep r20=0,r20,0,32
+	;;
+	add r20=r19,r20
+	mov b0=r24
+	;;
+	/* Comment it out due to short of fp lazy algorithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	;;
+	*/
+	mov cr.ipsr=r20
+	cmp.ne p6,p0=r0,r0
+	;;
+	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
+	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
+	(p6) br.dpnt.few kvm_resume_to_guest
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest
+END(kvm_asm_mov_to_psr)
+
+
+ENTRY(kvm_asm_dispatch_vexirq)
+//increment iip
+	mov r16=cr.ipsr
+	;;
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	(p7) add r17=1,r17
+	;;
+	(p6) add r18=0x10,r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	(p6) mov cr.iip=r18
+	mov cr.ipsr=r16
+	mov r30 =1
+	br.many kvm_dispatch_vexirq
+END(kvm_asm_dispatch_vexirq)
+
+// thash
+// TODO: add support when pta.vf = 1
+GLOBAL_ENTRY(kvm_asm_thash)
+#ifndef ACCE_THASH
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r17=r25,20,7		// get r3 from opcode in r25
+	extr.u r18=r25,6,7		// get r1 from opcode in r25
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
+	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
+	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
+	;;
+	mov r24=b0
+	;;
+	ld8 r16=[r16]		// get VPD addr
+	mov b0=r17
+	br.many b0			// r19 return value
+	;;
+kvm_asm_thash_back1:
+	shr.u r23=r19,61		// get RR number
+	adds r25=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
+	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
+	;;
+	shladd r27=r23,3,r25	// get vcpu->arch.vrr[r23]'s addr
+	ld8 r17=[r16]		// get PTA
+	mov r26=1
+	;;
+	extr.u r29=r17,2,6		// get pta.size
+	ld8 r25=[r27]		// get vcpu->arch.vrr[r23]'s value
+	;;
+	extr.u r25=r25,2,6		// get rr.ps
+	shl r22=r26,r29		// 1UL << pta.size
+	;;
+	shr.u r23=r19,r25		// vaddr >> rr.ps
+	adds r26=3,r29		// pta.size + 3
+	shl r27=r17,3		// pta << 3
+	;;
+	shl r23=r23,3		// (vaddr >> rr.ps) << 3
+	shr.u r27=r27,r26		// (pta << 3) >> (pta.size+3)
+	movl r16=7<<61
+	;;
+	adds r22=-1,r22		// (1UL << pta.size) - 1
+	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
+	and r19=r19,r16		// vaddr & VRN_MASK
+	;;
+	and r22=r22,r23		// vhpt_offset
+	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
+	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
+	;;
+	or r19=r19,r22		// calc pval
+	shladd r17=r18,4,r26
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	;;
+	mov b0=r17
+	br.many b0
+END(kvm_asm_thash)
+
+#define MOV_TO_REG0	\
+{;			\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	;;			\
+};
+
+
+#define MOV_TO_REG(n)	\
+{;			\
+	mov r##n##=r19;	\
+	mov b0=r30;	\
+	br.sptk.many b0;	\
+	;;			\
+};
+
+
+#define MOV_FROM_REG(n)	\
+{;				\
+	mov r19=r##n##;		\
+	mov b0=r30;		\
+	br.sptk.many b0;		\
+	;;				\
+};
+
+
+#define MOV_TO_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	mov r2=r19;				\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r##n##=r2;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r26;				\
+	mov b0=r30;				\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	nop.b 0x0;					\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r##n##;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r19=r2;				\
+	mov r2=r26;				\
+	mov b0=r30;				\
+};						\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_to_bank0_reg##n##;	\
+	;;						\
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_from_bank0_reg##n##;	\
+	;;						\
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+	MOV_FROM_REG(0)
+	MOV_FROM_REG(1)
+	MOV_FROM_REG(2)
+	MOV_FROM_REG(3)
+	MOV_FROM_REG(4)
+	MOV_FROM_REG(5)
+	MOV_FROM_REG(6)
+	MOV_FROM_REG(7)
+	MOV_FROM_REG(8)
+	MOV_FROM_REG(9)
+	MOV_FROM_REG(10)
+	MOV_FROM_REG(11)
+	MOV_FROM_REG(12)
+	MOV_FROM_REG(13)
+	MOV_FROM_REG(14)
+	MOV_FROM_REG(15)
+	JMP_TO_MOV_FROM_BANK0_REG(16)
+	JMP_TO_MOV_FROM_BANK0_REG(17)
+	JMP_TO_MOV_FROM_BANK0_REG(18)
+	JMP_TO_MOV_FROM_BANK0_REG(19)
+	JMP_TO_MOV_FROM_BANK0_REG(20)
+	JMP_TO_MOV_FROM_BANK0_REG(21)
+	JMP_TO_MOV_FROM_BANK0_REG(22)
+	JMP_TO_MOV_FROM_BANK0_REG(23)
+	JMP_TO_MOV_FROM_BANK0_REG(24)
+	JMP_TO_MOV_FROM_BANK0_REG(25)
+	JMP_TO_MOV_FROM_BANK0_REG(26)
+	JMP_TO_MOV_FROM_BANK0_REG(27)
+	JMP_TO_MOV_FROM_BANK0_REG(28)
+	JMP_TO_MOV_FROM_BANK0_REG(29)
+	JMP_TO_MOV_FROM_BANK0_REG(30)
+	JMP_TO_MOV_FROM_BANK0_REG(31)
+	MOV_FROM_REG(32)
+	MOV_FROM_REG(33)
+	MOV_FROM_REG(34)
+	MOV_FROM_REG(35)
+	MOV_FROM_REG(36)
+	MOV_FROM_REG(37)
+	MOV_FROM_REG(38)
+	MOV_FROM_REG(39)
+	MOV_FROM_REG(40)
+	MOV_FROM_REG(41)
+	MOV_FROM_REG(42)
+	MOV_FROM_REG(43)
+	MOV_FROM_REG(44)
+	MOV_FROM_REG(45)
+	MOV_FROM_REG(46)
+	MOV_FROM_REG(47)
+	MOV_FROM_REG(48)
+	MOV_FROM_REG(49)
+	MOV_FROM_REG(50)
+	MOV_FROM_REG(51)
+	MOV_FROM_REG(52)
+	MOV_FROM_REG(53)
+	MOV_FROM_REG(54)
+	MOV_FROM_REG(55)
+	MOV_FROM_REG(56)
+	MOV_FROM_REG(57)
+	MOV_FROM_REG(58)
+	MOV_FROM_REG(59)
+	MOV_FROM_REG(60)
+	MOV_FROM_REG(61)
+	MOV_FROM_REG(62)
+	MOV_FROM_REG(63)
+	MOV_FROM_REG(64)
+	MOV_FROM_REG(65)
+	MOV_FROM_REG(66)
+	MOV_FROM_REG(67)
+	MOV_FROM_REG(68)
+	MOV_FROM_REG(69)
+	MOV_FROM_REG(70)
+	MOV_FROM_REG(71)
+	MOV_FROM_REG(72)
+	MOV_FROM_REG(73)
+	MOV_FROM_REG(74)
+	MOV_FROM_REG(75)
+	MOV_FROM_REG(76)
+	MOV_FROM_REG(77)
+	MOV_FROM_REG(78)
+	MOV_FROM_REG(79)
+	MOV_FROM_REG(80)
+	MOV_FROM_REG(81)
+	MOV_FROM_REG(82)
+	MOV_FROM_REG(83)
+	MOV_FROM_REG(84)
+	MOV_FROM_REG(85)
+	MOV_FROM_REG(86)
+	MOV_FROM_REG(87)
+	MOV_FROM_REG(88)
+	MOV_FROM_REG(89)
+	MOV_FROM_REG(90)
+	MOV_FROM_REG(91)
+	MOV_FROM_REG(92)
+	MOV_FROM_REG(93)
+	MOV_FROM_REG(94)
+	MOV_FROM_REG(95)
+	MOV_FROM_REG(96)
+	MOV_FROM_REG(97)
+	MOV_FROM_REG(98)
+	MOV_FROM_REG(99)
+	MOV_FROM_REG(100)
+	MOV_FROM_REG(101)
+	MOV_FROM_REG(102)
+	MOV_FROM_REG(103)
+	MOV_FROM_REG(104)
+	MOV_FROM_REG(105)
+	MOV_FROM_REG(106)
+	MOV_FROM_REG(107)
+	MOV_FROM_REG(108)
+	MOV_FROM_REG(109)
+	MOV_FROM_REG(110)
+	MOV_FROM_REG(111)
+	MOV_FROM_REG(112)
+	MOV_FROM_REG(113)
+	MOV_FROM_REG(114)
+	MOV_FROM_REG(115)
+	MOV_FROM_REG(116)
+	MOV_FROM_REG(117)
+	MOV_FROM_REG(118)
+	MOV_FROM_REG(119)
+	MOV_FROM_REG(120)
+	MOV_FROM_REG(121)
+	MOV_FROM_REG(122)
+	MOV_FROM_REG(123)
+	MOV_FROM_REG(124)
+	MOV_FROM_REG(125)
+	MOV_FROM_REG(126)
+	MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(kvm_resume_to_guest)
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 =[r16]
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	mov r16=cr.ipsr
+	;;
+	ld8 r20 = [r20]
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r25=[r19]
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	;;
+	(p6) add r18=0x10,r18
+	(p7) add r17=1,r17
+	;;
+	(p6) mov cr.iip=r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	mov cr.ipsr=r16
+	adds r19= VPD_VPSR_START_OFFSET,r25
+	add r28=PAL_VPS_RESUME_NORMAL,r20
+	add r29=PAL_VPS_RESUME_HANDLER,r20
+	;;
+	ld8 r19=[r19]
+	mov b0=r29
+	cmp.ne p6,p7 = r0,r0
+	;;
+	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p1=vpsr.ic
+	;;
+	(p6) ld8 r26=[r25]
+	(p7) mov b0=r28
+	mov pr=r31,-2
+	br.sptk.many b0             // call pal service
+	;;
+END(kvm_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+	MOV_TO_REG0
+	MOV_TO_REG(1)
+	MOV_TO_REG(2)
+	MOV_TO_REG(3)
+	MOV_TO_REG(4)
+	MOV_TO_REG(5)
+	MOV_TO_REG(6)
+	MOV_TO_REG(7)
+	MOV_TO_REG(8)
+	MOV_TO_REG(9)
+	MOV_TO_REG(10)
+	MOV_TO_REG(11)
+	MOV_TO_REG(12)
+	MOV_TO_REG(13)
+	MOV_TO_REG(14)
+	MOV_TO_REG(15)
+	JMP_TO_MOV_TO_BANK0_REG(16)
+	JMP_TO_MOV_TO_BANK0_REG(17)
+	JMP_TO_MOV_TO_BANK0_REG(18)
+	JMP_TO_MOV_TO_BANK0_REG(19)
+	JMP_TO_MOV_TO_BANK0_REG(20)
+	JMP_TO_MOV_TO_BANK0_REG(21)
+	JMP_TO_MOV_TO_BANK0_REG(22)
+	JMP_TO_MOV_TO_BANK0_REG(23)
+	JMP_TO_MOV_TO_BANK0_REG(24)
+	JMP_TO_MOV_TO_BANK0_REG(25)
+	JMP_TO_MOV_TO_BANK0_REG(26)
+	JMP_TO_MOV_TO_BANK0_REG(27)
+	JMP_TO_MOV_TO_BANK0_REG(28)
+	JMP_TO_MOV_TO_BANK0_REG(29)
+	JMP_TO_MOV_TO_BANK0_REG(30)
+	JMP_TO_MOV_TO_BANK0_REG(31)
+	MOV_TO_REG(32)
+	MOV_TO_REG(33)
+	MOV_TO_REG(34)
+	MOV_TO_REG(35)
+	MOV_TO_REG(36)
+	MOV_TO_REG(37)
+	MOV_TO_REG(38)
+	MOV_TO_REG(39)
+	MOV_TO_REG(40)
+	MOV_TO_REG(41)
+	MOV_TO_REG(42)
+	MOV_TO_REG(43)
+	MOV_TO_REG(44)
+	MOV_TO_REG(45)
+	MOV_TO_REG(46)
+	MOV_TO_REG(47)
+	MOV_TO_REG(48)
+	MOV_TO_REG(49)
+	MOV_TO_REG(50)
+	MOV_TO_REG(51)
+	MOV_TO_REG(52)
+	MOV_TO_REG(53)
+	MOV_TO_REG(54)
+	MOV_TO_REG(55)
+	MOV_TO_REG(56)
+	MOV_TO_REG(57)
+	MOV_TO_REG(58)
+	MOV_TO_REG(59)
+	MOV_TO_REG(60)
+	MOV_TO_REG(61)
+	MOV_TO_REG(62)
+	MOV_TO_REG(63)
+	MOV_TO_REG(64)
+	MOV_TO_REG(65)
+	MOV_TO_REG(66)
+	MOV_TO_REG(67)
+	MOV_TO_REG(68)
+	MOV_TO_REG(69)
+	MOV_TO_REG(70)
+	MOV_TO_REG(71)
+	MOV_TO_REG(72)
+	MOV_TO_REG(73)
+	MOV_TO_REG(74)
+	MOV_TO_REG(75)
+	MOV_TO_REG(76)
+	MOV_TO_REG(77)
+	MOV_TO_REG(78)
+	MOV_TO_REG(79)
+	MOV_TO_REG(80)
+	MOV_TO_REG(81)
+	MOV_TO_REG(82)
+	MOV_TO_REG(83)
+	MOV_TO_REG(84)
+	MOV_TO_REG(85)
+	MOV_TO_REG(86)
+	MOV_TO_REG(87)
+	MOV_TO_REG(88)
+	MOV_TO_REG(89)
+	MOV_TO_REG(90)
+	MOV_TO_REG(91)
+	MOV_TO_REG(92)
+	MOV_TO_REG(93)
+	MOV_TO_REG(94)
+	MOV_TO_REG(95)
+	MOV_TO_REG(96)
+	MOV_TO_REG(97)
+	MOV_TO_REG(98)
+	MOV_TO_REG(99)
+	MOV_TO_REG(100)
+	MOV_TO_REG(101)
+	MOV_TO_REG(102)
+	MOV_TO_REG(103)
+	MOV_TO_REG(104)
+	MOV_TO_REG(105)
+	MOV_TO_REG(106)
+	MOV_TO_REG(107)
+	MOV_TO_REG(108)
+	MOV_TO_REG(109)
+	MOV_TO_REG(110)
+	MOV_TO_REG(111)
+	MOV_TO_REG(112)
+	MOV_TO_REG(113)
+	MOV_TO_REG(114)
+	MOV_TO_REG(115)
+	MOV_TO_REG(116)
+	MOV_TO_REG(117)
+	MOV_TO_REG(118)
+	MOV_TO_REG(119)
+	MOV_TO_REG(120)
+	MOV_TO_REG(121)
+	MOV_TO_REG(122)
+	MOV_TO_REG(123)
+	MOV_TO_REG(124)
+	MOV_TO_REG(125)
+	MOV_TO_REG(126)
+	MOV_TO_REG(127)
+END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 0000000..5a33f7e
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,970 @@
+/*
+ * process.c: handle interruption inject for guests.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  	Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Xiantao Zhang (xiantao.zhang@intel.com)
+ */
+#include "vcpu.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/fpswa.h>
+#include <asm/kregs.h>
+#include <asm/tlb.h>
+
+fpswa_interface_t *vmm_fpswa_interface;
+
+#define IA64_VHPT_TRANS_VECTOR			0x0000
+#define IA64_INST_TLB_VECTOR			0x0400
+#define IA64_DATA_TLB_VECTOR			0x0800
+#define IA64_ALT_INST_TLB_VECTOR		0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR		0x1000
+#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
+#define IA64_INST_KEY_MISS_VECTOR		0x1800
+#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
+#define IA64_DIRTY_BIT_VECTOR			0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
+#define IA64_BREAK_VECTOR			0x2c00
+#define IA64_EXTINT_VECTOR			0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
+#define IA64_KEY_PERMISSION_VECTOR		0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
+#define IA64_GENEX_VECTOR			0x5400
+#define IA64_DISABLED_FPREG_VECTOR		0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
+#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR			0x5900
+#define IA64_UNALIGNED_REF_VECTOR		0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
+#define IA64_FP_FAULT_VECTOR			0x5c00
+#define IA64_FP_TRAP_VECTOR			0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
+
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
+			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
+			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
+
+#define DOMN_PAL_REQUEST    0x110000
+#define DOMN_SAL_REQUEST    0x110001
+
+static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
+	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
+	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
+	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
+	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
+	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
+};
+
+static void collect_interruption(struct kvm_vcpu *vcpu)
+{
+	u64 ipsr;
+	u64 vdcr;
+	u64 vifs;
+	unsigned long vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = vcpu_get_psr(vcpu);
+	vcpu_bsw0(vcpu);
+	if (vpsr & IA64_PSR_IC) {
+
+		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
+		 * since after guest do rfi, we still want these bits on in
+		 * mpsr
+		 */
+
+		ipsr = regs->cr_ipsr;
+		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+					| IA64_PSR_DD | IA64_PSR_SS
+					| IA64_PSR_ED));
+		vcpu_set_ipsr(vcpu, vpsr);
+
+		/* Currently, for trap, we do not advance IIP to next
+		 * instruction. That's because we assume caller already
+		 * set up IIP correctly
+		 */
+
+		vcpu_set_iip(vcpu , regs->cr_iip);
+
+		/* set vifs.v to zero */
+		vifs = VCPU(vcpu, ifs);
+		vifs &= ~IA64_IFS_V;
+		vcpu_set_ifs(vcpu, vifs);
+
+		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
+	}
+
+	vdcr = VCPU(vcpu, dcr);
+
+	/* Set guest psr
+	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+	 * be: set to the value of dcr.be
+	 * pp: set to the value of dcr.pp
+	 */
+	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+	vpsr |= (vdcr & IA64_DCR_BE);
+
+	/* VDCR pp bit position is different from VPSR pp bit */
+	if (vdcr & IA64_DCR_PP) {
+		vpsr |= IA64_PSR_PP;
+	} else {
+		vpsr &= ~IA64_PSR_PP;;
+	}
+
+	vcpu_set_psr(vcpu, vpsr);
+
+}
+
+void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
+{
+	u64 viva;
+	struct kvm_pt_regs *regs;
+	union ia64_isr pt_isr;
+
+	regs = vcpu_regs(vcpu);
+
+	/* clear cr.isr.ir (incomplete register frame)*/
+	pt_isr.val = VMX(vcpu, cr_isr);
+	pt_isr.ir = 0;
+	VMX(vcpu, cr_isr) = pt_isr.val;
+
+	collect_interruption(vcpu);
+
+	viva = vcpu_get_iva(vcpu);
+	regs->cr_iip = viva + vec;
+}
+
+static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ *  set_ifa: if true, set vIFA
+ *  set_itir: if true, set vITIR
+ *  set_iha: if true, set vIHA
+ */
+void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
+		int set_ifa, int set_itir, int set_iha)
+{
+	long vpsr;
+	u64 value;
+
+	vpsr = VCPU(vcpu, vpsr);
+	/* Vol2, Table 8-1 */
+	if (vpsr & IA64_PSR_IC) {
+		if (set_ifa)
+			vcpu_set_ifa(vcpu, vadr);
+		if (set_itir) {
+			value = vcpu_get_itir_on_fault(vcpu, vadr);
+			vcpu_set_itir(vcpu, value);
+		}
+
+		if (set_iha) {
+			value = vcpu_thash(vcpu, vadr);
+			vcpu_set_iha(vcpu, value);
+		}
+	}
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ *  @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ *  @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void nested_dtlb(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ *  @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ *  VHPT Translation Vector
+ */
+static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA*/
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ *  General Exception vector
+ */
+void _general_exception(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_dep(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rsv_reg_field(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void privilege_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void unimpl_daddr(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void privilege_reg(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/* Deal with
+ *  Nat consumption vector
+ * Parameter:
+ *  vaddr: Optional, if t == REGISTER
+ */
+static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
+						enum tlb_miss_type t)
+{
+	/* If vPSR.ic && t == DATA/INST, IFA */
+	if (t == DATA || t == INSTRUCTION) {
+		/* IFA */
+		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
+	}
+
+	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rnat_consumption(struct kvm_vcpu *vcpu)
+{
+	_nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ *  Page not present vector
+ */
+static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
+
+void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+
+void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+
+/* Deal with
+ *  Data access rights vector
+ */
+void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
+}
+
+fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
+		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
+		unsigned long *ifs, struct kvm_pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+	struct kvm_vcpu *vcpu = current_vcpu;
+
+	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
+
+	if (!vmm_fpswa_interface)
+		return (fpswa_ret_t) {-1, 0, 0, 0};
+
+	/*
+	 * Just let fpswa driver to use hardware fp registers.
+	 * No fp register is valid in memory.
+	 */
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *      void             *Bundle,
+	 *      unsigned long    *pipsr,
+	 *      unsigned long    *pfsr,
+	 *      unsigned long    *pisr,
+	 *      unsigned long    *ppreds,
+	 *      unsigned long    *pifs,
+	 *      void             *fp_state);
+	 */
+	/*Call host fpswa interface directly to virtualize
+	 *guest fpswa request!
+	 */
+	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
+	ia64_srlz_d();
+
+	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
+			ipsr, fpsr, isr, pr, ifs, &fp_state);
+	ia64_set_rr(7UL << 61, old_rr7);
+	ia64_srlz_d();
+	return ret;
+}
+
+/*
+ * Handle floating-point assist faults and traps for domain.
+ */
+unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
+					unsigned long isr)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	IA64_BUNDLE bundle;
+	unsigned long fault_ip;
+	fpswa_ret_t ret;
+
+	fault_ip = regs->cr_iip;
+	/*
+	 * When the FP trap occurs, the trapping instruction is completed.
+	 * If ipsr.ri == 0, there is the trapping instruction in previous
+	 * bundle.
+	 */
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+
+	if (fetch_code(v, fault_ip, &bundle))
+		return -EAGAIN;
+
+	if (!bundle.i64[0] && !bundle.i64[1])
+		return -EACCES;
+
+	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
+			&isr, &regs->pr, &regs->cr_ifs, regs);
+	return ret.status;
+}
+
+void reflect_interruption(u64 ifa, u64 isr, u64 iim,
+		u64 vec, struct kvm_pt_regs *regs)
+{
+	u64 vector;
+	int status ;
+	struct kvm_vcpu *vcpu = current_vcpu;
+	u64 vpsr = VCPU(vcpu, vpsr);
+
+	vector = vec2off[vec];
+
+	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
+		panic_vm(vcpu);
+		return;
+	}
+
+	switch (vec) {
+	case 32: 	/*IA64_FP_FAULT_VECTOR*/
+		status = vmm_handle_fpu_swa(1, regs, isr);
+		if (!status) {
+			vcpu_increment_iip(vcpu);
+			return;
+		} else if (-EAGAIN == status)
+			return;
+		break;
+	case 33:	/*IA64_FP_TRAP_VECTOR*/
+		status = vmm_handle_fpu_swa(0, regs, isr);
+		if (!status)
+			return ;
+		else if (-EAGAIN == status) {
+			vcpu_decrement_iip(vcpu);
+			return ;
+		}
+		break;
+	}
+
+	VCPU(vcpu, isr) = isr;
+	VCPU(vcpu, iipa) = regs->cr_iip;
+	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+		VCPU(vcpu, iim) = iim;
+	else
+		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
+
+	inject_guest_interruption(vcpu, vector);
+}
+
+static void set_pal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	/*FIXME:For static and stacked convention, firmware
+	 * has put the parameters in gr28-gr31 before
+	 * break to vmm  !!*/
+
+	p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
+	p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
+	p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+	p->exit_reason = EXIT_REASON_PAL_CALL;
+}
+
+static void set_pal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
+		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
+		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
+		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
+	} else
+		panic_vm(vcpu);
+}
+
+static void set_sal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
+	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
+	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
+	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
+	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
+	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
+	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
+	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
+	p->exit_reason = EXIT_REASON_SAL_CALL;
+}
+
+static void set_sal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
+		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
+		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
+		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
+	} else
+		panic_vm(vcpu);
+}
+
+void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
+		unsigned long isr, unsigned long iim)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (ia64_psr(regs)->cpl == 0) {
+		/* Allow hypercalls only when cpl = 0.  */
+		if (iim == DOMN_PAL_REQUEST) {
+			set_pal_call_data(v);
+			vmm_transition(v);
+			set_pal_call_result(v);
+			vcpu_increment_iip(v);
+			return;
+		} else if (iim == DOMN_SAL_REQUEST) {
+			set_sal_call_data(v);
+			vmm_transition(v);
+			set_sal_call_result(v);
+			vcpu_increment_iip(v);
+			return;
+		}
+	}
+	reflect_interruption(ifa, isr, iim, 11, regs);
+}
+
+void check_pending_irq(struct kvm_vcpu *vcpu)
+{
+	int  mask, h_pending, h_inservice;
+	u64 isr;
+	unsigned long  vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	h_pending = highest_pending_irq(vcpu);
+	if (h_pending == NULL_VECTOR) {
+		update_vhpi(vcpu, NULL_VECTOR);
+		return;
+	}
+	h_inservice = highest_inservice_irq(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	mask = irq_masked(vcpu, h_pending, h_inservice);
+	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
+		isr = vpsr & IA64_PSR_RI;
+		update_vhpi(vcpu, h_pending);
+		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+	} else if (mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+	} else {
+		/* masked by vpsr.i or vtpr.*/
+		update_vhpi(vcpu, h_pending);
+	}
+}
+
+static void generate_exirq(struct kvm_vcpu *vcpu)
+{
+	unsigned  vpsr;
+	uint64_t isr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	if (!(vpsr & IA64_PSR_IC))
+		panic_vm(vcpu);
+	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+}
+
+void vhpi_detection(struct kvm_vcpu *vcpu)
+{
+	uint64_t    threshold, vhpi;
+	union ia64_tpr       vtpr;
+	struct ia64_psr vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vtpr.val = VCPU(vcpu, tpr);
+
+	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+	vhpi = VCPU(vcpu, vhpi);
+	if (vhpi > threshold) {
+		/* interrupt actived*/
+		generate_exirq(vcpu);
+	}
+}
+
+
+void leave_hypervisor_tail(void)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (VMX(v, timer_check)) {
+		VMX(v, timer_check) = 0;
+		if (VMX(v, itc_check)) {
+			if (vcpu_get_itc(v) > VCPU(v, itm)) {
+				if (!(VCPU(v, itv) & (1 << 16))) {
+					vcpu_pend_interrupt(v, VCPU(v, itv)
+							& 0xff);
+				VMX(v, itc_check) = 0;
+				} else {
+					v->arch.timer_pending = 1;
+				}
+				VMX(v, last_itc) = VCPU(v, itm) + 1;
+			}
+		}
+	}
+
+	rmb();
+	if (v->arch.irq_new_pending) {
+		v->arch.irq_new_pending = 0;
+		VMX(v, irq_check) = 0;
+		check_pending_irq(v);
+		return;
+	}
+	if (VMX(v, irq_check)) {
+		VMX(v, irq_check) = 0;
+		vhpi_detection(v);
+	}
+}
+
+
+static inline void handle_lds(struct kvm_pt_regs *regs)
+{
+	regs->cr_ipsr |= IA64_PSR_ED;
+}
+
+void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
+{
+	unsigned long pte;
+	union ia64_rr rr;
+
+	rr.val = ia64_get_rr(vadr);
+	pte =  vadr & _PAGE_PPN_MASK;
+	pte = pte | PHY_PAGE_WB;
+	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
+	return;
+}
+
+void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
+{
+	unsigned long vpsr;
+	int type;
+
+	u64 vhpt_adr, gppa, pteval, rr, itir;
+	union ia64_isr misr;
+	union ia64_pta vpta;
+	struct thash_data *data;
+	struct kvm_vcpu *v = current_vcpu;
+
+	vpsr = VCPU(v, vpsr);
+	misr.val = VMX(v, cr_isr);
+
+	type = vec;
+
+	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
+		if (vec == 2) {
+			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
+				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
+				return;
+			}
+		}
+		physical_tlb_miss(v, vadr, type);
+		return;
+	}
+	data = vtlb_lookup(v, vadr, type);
+	if (data != 0) {
+		if (type == D_TLB) {
+			gppa = (vadr & ((1UL << data->ps) - 1))
+				+ (data->ppn >> (data->ps - 12) << data->ps);
+			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
+				if (data->pl >= ((regs->cr_ipsr >>
+						IA64_PSR_CPL0_BIT) & 3))
+					emulate_io_inst(v, gppa, data->ma);
+				else {
+					vcpu_set_isr(v, misr.val);
+					data_access_rights(v, vadr);
+				}
+				return ;
+			}
+		}
+		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+
+	} else if (type == D_TLB) {
+		if (misr.sp) {
+			handle_lds(regs);
+			return;
+		}
+
+		rr = vcpu_get_rr(v, vadr);
+		itir = rr & (RR_RID_MASK | RR_PS_MASK);
+
+		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				alt_dtlb(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+			return ;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+		/* avoid recursively walking (short format) VHPT */
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (!(pteval & _PAGE_P)) {
+				if (vpsr & IA64_PSR_IC) {
+					vcpu_set_isr(v, misr.val);
+					dtlb_fault(v, vadr);
+				} else {
+					nested_dtlb(v);
+				}
+			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
+				thash_purge_and_insert(v, pteval, itir,
+								vadr, D_TLB);
+			} else if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dtlb_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		} else {
+			/* Can't read VHPT.  */
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dvhpt_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		}
+	} else if (type == I_TLB) {
+		if (!(vpsr & IA64_PSR_IC))
+			misr.ni = 1;
+		if (!vhpt_enabled(v, vadr, INST_REF)) {
+			vcpu_set_isr(v, misr.val);
+			alt_itlb(v, vadr);
+			return;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (pteval & _PAGE_P) {
+				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
+					vcpu_set_isr(v, misr.val);
+					itlb_fault(v, vadr);
+					return ;
+				}
+				rr = vcpu_get_rr(v, vadr);
+				itir = rr & (RR_RID_MASK | RR_PS_MASK);
+				thash_purge_and_insert(v, pteval, itir,
+							vadr, I_TLB);
+			} else {
+				vcpu_set_isr(v, misr.val);
+				inst_page_not_present(v, vadr);
+			}
+		} else {
+			vcpu_set_isr(v, misr.val);
+			ivhpt_fault(v, vadr);
+		}
+	}
+}
+
+void kvm_vexirq(struct kvm_vcpu *vcpu)
+{
+	u64 vpsr, isr;
+	struct kvm_pt_regs *regs;
+
+	regs = vcpu_regs(vcpu);
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
+}
+
+void kvm_ia64_handle_irq(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+	vmm_transition(v);
+	local_irq_restore(psr);
+
+	VMX(v, timer_check) = 1;
+
+}
+
+static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
+{
+	u64 oldrid, moldrid, oldpsbits, vaddr;
+	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
+	vaddr = p->vaddr;
+
+	oldrid = VMX(v, vrr[0]);
+	VMX(v, vrr[0]) = p->rr;
+	oldpsbits = VMX(v, psbits[0]);
+	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
+	moldrid = ia64_get_rr(0x0);
+	ia64_set_rr(0x0, vrrtomrr(p->rr));
+	ia64_srlz_d();
+
+	vaddr = PAGEALIGN(vaddr, p->ps);
+	thash_purge_entries_remote(v, vaddr, p->ps);
+
+	VMX(v, vrr[0]) = oldrid;
+	VMX(v, psbits[0]) = oldpsbits;
+	ia64_set_rr(0x0, moldrid);
+	ia64_dv_serialize_data();
+}
+
+static void vcpu_do_resume(struct kvm_vcpu *vcpu)
+{
+	/*Re-init VHPT and VTLB once from resume*/
+	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
+	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
+
+	ia64_set_pta(vcpu->arch.vhpt.pta.val);
+}
+
+static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
+{
+	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
+		vcpu_do_resume(vcpu);
+		return;
+	}
+
+	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
+		thash_purge_all(vcpu);
+		return;
+	}
+
+	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
+		while (vcpu->arch.ptc_g_count > 0)
+			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
+	}
+}
+
+void vmm_transition(struct kvm_vcpu *vcpu)
+{
+	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
+			0, 0, 0, 0, 0, 0);
+	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_do_resume_op(vcpu);
+}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 0000000..30897d4
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
+/* Save all processor states
+ *
+ * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
+ * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include "asm-offsets.h"
+
+
+#define CTX(name)    VMM_CTX_##name##_OFFSET
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r32;		\
+	add	r3 = CTX(B1),r32;		\
+	mov	r16 = b0;			\
+	mov	r17 = b1;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b2;			\
+	mov	r17 = b3;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b4;			\
+	mov	r17 = b5;			\
+	;;					\
+	st8	[r2]=r16;   			\
+	st8	[r3]=r17;   			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r33;		\
+	add	r3 = CTX(B1),r33;		\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b0 = r16;			\
+	mov	b1 = r17;			\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b2 = r16;			\
+	mov	b3 = r17;			\
+	;;					\
+	ld8	r16=[r2];   			\
+	ld8	r17=[r3];   			\
+	;;					\
+	mov	b4=r16;				\
+	mov	b5=r17;				\
+	;;
+
+
+	/*
+	 *	r32: context_t base address
+	 *	bsw == 1
+	 *	Save all bank1 general registers, r4 ~ r7
+	 */
+#define	SAVE_GENERAL_REGS			\
+	add	r2=CTX(R4),r32;			\
+	add	r3=CTX(R5),r32;			\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r4,16;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r5,16;		\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r6,48;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r7,48;		\
+	;;                          		\
+.mem.offset 0,0;        			\
+    st8.spill    [r2]=r12;			\
+.mem.offset 8,0;				\
+    st8.spill    [r3]=r13;			\
+    ;;
+
+	/*
+	 *	r33: context_t base address
+	 *	bsw == 1
+	 */
+#define	RESTORE_GENERAL_REGS			\
+	add	r2=CTX(R4),r33;			\
+	add	r3=CTX(R5),r33;			\
+	;;					\
+	ld8.fill	r4=[r2],16;		\
+	ld8.fill	r5=[r3],16;		\
+	;;					\
+	ld8.fill	r6=[r2],48;		\
+	ld8.fill	r7=[r3],48;		\
+	;;					\
+	ld8.fill    r12=[r2];			\
+	ld8.fill    r13 =[r3];			\
+	;;
+
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r32;		\
+	add	r3 = CTX(KR1),r32;		\
+	mov	r16 = ar.k0;			\
+	mov	r17 = ar.k1;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;		        		\
+	mov	r16 = ar.k2;			\
+	mov	r17 = ar.k3;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k4;			\
+	mov	r17 = ar.k5;			\
+	;;				    	\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k6;			\
+	mov	r17 = ar.k7;			\
+	;;		    			\
+	st8	[r2] = r16;     		\
+	st8	[r3] = r17;			\
+	;;
+
+
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r33;		\
+	add	r3 = CTX(KR1),r33;		\
+	;;		    			\
+	ld8	r16=[r2],16;     		\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k0=r16;  			\
+	mov	ar.k1=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;		        		\
+	mov	ar.k2=r16;   			\
+	mov	ar.k3=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k4=r16;			\
+	mov	ar.k5=r17;	    		\
+	;;				    	\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k6=r16;  			\
+	mov	ar.k7=r17;	    		\
+	;;
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_APP_REGS				\
+	add  r2 = CTX(BSPSTORE),r32;		\
+	mov  r16 = ar.bspstore;			\
+	;;					\
+	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
+	mov  r16 = ar.rnat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
+	mov  r16 = ar.fcr;			\
+	;;					\
+	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
+	mov  r16 = ar.eflag;			\
+	;;					\
+	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
+	mov  r16 = ar.cflg;			\
+	;;					\
+	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
+	mov  r16 = ar.fsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
+	mov  r16 = ar.fir;			\
+	;;					\
+	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
+	mov  r16 = ar.fdr;			\
+	;;					\
+	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
+	mov  r16 = ar.unat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
+	mov  r16 = ar.fpsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
+	mov  r16 = ar.pfs;			\
+	;;					\
+	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
+	mov  r16 = ar.lc;			\
+	;;					\
+	st8  [r2] = r16;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_APP_REGS			\
+	add  r2=CTX(BSPSTORE),r33;		\
+	;;					\
+	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
+	;;					\
+	mov  ar.bspstore=r16;			\
+	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
+	;;					\
+	mov  ar.rnat=r16;			\
+	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
+	;;					\
+	mov  ar.fcr=r16;			\
+	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
+	;;					\
+	mov  ar.eflag=r16;			\
+	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
+	;;					\
+	mov  ar.cflg=r16;			\
+	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
+	;;					\
+	mov  ar.fsr=r16;			\
+	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
+	;;					\
+	mov  ar.fir=r16;			\
+	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
+	;;					\
+	mov  ar.fdr=r16;			\
+	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
+	;;					\
+	mov  ar.unat=r16;			\
+	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
+	;;					\
+	mov  ar.fpsr=r16;			\
+	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
+	;;					\
+	mov  ar.pfs=r16;			\
+	ld8  r16=[r2];				\
+	;;					\
+	mov  ar.lc=r16;				\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_CTL_REGS				\
+	add	r2 = CTX(DCR),r32;		\
+	mov	r16 = cr.dcr;			\
+	;;					\
+	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
+	;;                          		\
+	mov	r16 = cr.iva;			\
+	;;					\
+	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
+	;;					\
+	mov r16 = cr.pta;			\
+	;;					\
+	st8 [r2] = r16 ;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_CTL_REGS				\
+	add	r2 = CTX(DCR),r33;	        	\
+	;;						\
+	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
+	;;                      			\
+	mov	cr.dcr = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
+	;;						\
+	mov	cr.iva = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8 r16 = [r2];					\
+	;;						\
+	mov cr.pta = r16;				\
+	dv_serialize_data;				\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_REGION_REGS			\
+	add	r2=CTX(RR0),r32;		\
+	mov	r16=rr[r0];			\
+	dep.z	r18=1,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=2,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=3,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=4,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=5,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=7,61,3;			\
+	;;					\
+	st8	[r2]=r17,16;			\
+	mov	r16=rr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	;;
+
+	/*
+	 *	r33:context_t base address
+	 */
+#define	RESTORE_REGION_REGS	\
+	add	r2=CTX(RR0),r33;\
+	mov r18=r0;		\
+	;;			\
+	ld8	r20=[r2],8;	\
+	;;	/* rr0 */	\
+	ld8	r21=[r2],8;	\
+	;;	/* rr1 */	\
+	ld8	r22=[r2],8;	\
+	;;	/* rr2 */	\
+	ld8	r23=[r2],8;	\
+	;;	/* rr3 */	\
+	ld8	r24=[r2],8;	\
+	;;	/* rr4 */	\
+	ld8	r25=[r2],16;	\
+	;;	/* rr5 */	\
+	ld8	r27=[r2];	\
+	;;	/* rr7 */	\
+	mov rr[r18]=r20;	\
+	dep.z	r18=1,61,3;	\
+	;;  /* rr1 */		\
+	mov rr[r18]=r21;	\
+	dep.z	r18=2,61,3;	\
+	;;  /* rr2 */		\
+	mov rr[r18]=r22;	\
+	dep.z	r18=3,61,3;	\
+	;;  /* rr3 */		\
+	mov rr[r18]=r23;	\
+	dep.z	r18=4,61,3;	\
+	;;  /* rr4 */		\
+	mov rr[r18]=r24;	\
+	dep.z	r18=5,61,3;	\
+	;;  /* rr5 */		\
+	mov rr[r18]=r25;	\
+	dep.z	r18=7,61,3;	\
+	;;  /* rr7 */		\
+	mov rr[r18]=r27;	\
+	;;			\
+	srlz.i;			\
+	;;
+
+
+
+	/*
+	 *	r32:	context_t base address
+	 *	r36~r39:scratch registers
+	 */
+#define	SAVE_DEBUG_REGS				\
+	add	r2=CTX(IBR0),r32;		\
+	add	r3=CTX(DBR0),r32;		\
+	mov	r16=ibr[r0];			\
+	mov	r17=dbr[r0];			\
+	;;					\
+	st8	[r2]=r16,8; 			\
+	st8	[r3]=r17,8;	    		\
+	add	r18=1,r0;		    	\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=3,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=4,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=5,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=6,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=7,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	;;
+
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_DEBUG_REGS			\
+	add	r2=CTX(IBR0),r33;		\
+	add	r3=CTX(DBR0),r33;		\
+	mov r16=7;    				\
+	mov r17=r0;				\
+	;;                    			\
+	mov ar.lc = r16;			\
+	;; 					\
+1:						\
+	ld8 r18=[r2],8;		    		\
+	ld8 r19=[r3],8;				\
+	;;					\
+	mov ibr[r17]=r18;			\
+	mov dbr[r17]=r19;			\
+	;;   					\
+	srlz.i;					\
+	;; 					\
+	add r17=1,r17;				\
+	br.cloop.sptk 1b;			\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_LOW				\
+	add	r2=CTX(F2),r32;			\
+	add	r3=CTX(F3),r32;			\
+	;;					\
+	stf.spill.nta	[r2]=f2,32;		\
+	stf.spill.nta	[r3]=f3,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f4,32;		\
+	stf.spill.nta	[r3]=f5,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f6,32;		\
+	stf.spill.nta	[r3]=f7,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f8,32;		\
+	stf.spill.nta	[r3]=f9,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f10,32;		\
+	stf.spill.nta	[r3]=f11,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f12,32;		\
+	stf.spill.nta	[r3]=f13,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f14,32;		\
+	stf.spill.nta	[r3]=f15,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f16,32;		\
+	stf.spill.nta	[r3]=f17,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f18,32;		\
+	stf.spill.nta	[r3]=f19,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f20,32;		\
+	stf.spill.nta	[r3]=f21,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f22,32;		\
+	stf.spill.nta	[r3]=f23,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f24,32;		\
+	stf.spill.nta	[r3]=f25,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f26,32;		\
+	stf.spill.nta	[r3]=f27,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f28,32;		\
+	stf.spill.nta	[r3]=f29,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f30;		\
+	stf.spill.nta	[r3]=f31;		\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_HIGH				\
+	add	r2=CTX(F32),r32;		\
+	add	r3=CTX(F33),r32;		\
+	;;					\
+	stf.spill.nta	[r2]=f32,32;		\
+	stf.spill.nta	[r3]=f33,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f34,32;		\
+	stf.spill.nta	[r3]=f35,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f36,32;		\
+	stf.spill.nta	[r3]=f37,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f38,32;		\
+	stf.spill.nta	[r3]=f39,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f40,32;		\
+	stf.spill.nta	[r3]=f41,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f42,32;		\
+	stf.spill.nta	[r3]=f43,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f44,32;		\
+	stf.spill.nta	[r3]=f45,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f46,32;		\
+	stf.spill.nta	[r3]=f47,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f48,32;		\
+	stf.spill.nta	[r3]=f49,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f50,32;		\
+	stf.spill.nta	[r3]=f51,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f52,32;		\
+	stf.spill.nta	[r3]=f53,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f54,32;		\
+	stf.spill.nta	[r3]=f55,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f56,32;		\
+	stf.spill.nta	[r3]=f57,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f58,32;		\
+	stf.spill.nta	[r3]=f59,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f60,32;		\
+	stf.spill.nta	[r3]=f61,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f62,32;		\
+	stf.spill.nta	[r3]=f63,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f64,32;		\
+	stf.spill.nta	[r3]=f65,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f66,32;		\
+	stf.spill.nta	[r3]=f67,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f68,32;		\
+	stf.spill.nta	[r3]=f69,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f70,32;		\
+	stf.spill.nta	[r3]=f71,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f72,32;		\
+	stf.spill.nta	[r3]=f73,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f74,32;		\
+	stf.spill.nta	[r3]=f75,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f76,32;		\
+	stf.spill.nta	[r3]=f77,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f78,32;		\
+	stf.spill.nta	[r3]=f79,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f80,32;		\
+	stf.spill.nta	[r3]=f81,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f82,32;		\
+	stf.spill.nta	[r3]=f83,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f84,32;		\
+	stf.spill.nta	[r3]=f85,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f86,32;		\
+	stf.spill.nta	[r3]=f87,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f88,32;		\
+	stf.spill.nta	[r3]=f89,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f90,32;		\
+	stf.spill.nta	[r3]=f91,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f92,32;		\
+	stf.spill.nta	[r3]=f93,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f94,32;		\
+	stf.spill.nta	[r3]=f95,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f96,32;		\
+	stf.spill.nta	[r3]=f97,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f98,32;		\
+	stf.spill.nta	[r3]=f99,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f100,32;		\
+	stf.spill.nta	[r3]=f101,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f102,32;		\
+	stf.spill.nta	[r3]=f103,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f104,32;		\
+	stf.spill.nta	[r3]=f105,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f106,32;		\
+	stf.spill.nta	[r3]=f107,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f108,32;		\
+	stf.spill.nta	[r3]=f109,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f110,32;		\
+	stf.spill.nta	[r3]=f111,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f112,32;		\
+	stf.spill.nta	[r3]=f113,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f114,32;		\
+	stf.spill.nta	[r3]=f115,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f116,32;		\
+	stf.spill.nta	[r3]=f117,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f118,32;		\
+	stf.spill.nta	[r3]=f119,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f120,32;		\
+	stf.spill.nta	[r3]=f121,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f122,32;		\
+	stf.spill.nta	[r3]=f123,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f124,32;		\
+	stf.spill.nta	[r3]=f125,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f126;		\
+	stf.spill.nta	[r3]=f127;		\
+	;;
+
+     /*
+      *      r33:    point to context_t structure
+      */
+#define	RESTORE_FPU_LOW				\
+    add     r2 = CTX(F2), r33;			\
+    add     r3 = CTX(F3), r33;			\
+    ;;						\
+    ldf.fill.nta f2 = [r2], 32;			\
+    ldf.fill.nta f3 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f4 = [r2], 32;			\
+    ldf.fill.nta f5 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f6 = [r2], 32;			\
+    ldf.fill.nta f7 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f8 = [r2], 32;			\
+    ldf.fill.nta f9 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f10 = [r2], 32;		\
+    ldf.fill.nta f11 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f12 = [r2], 32;		\
+    ldf.fill.nta f13 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f14 = [r2], 32;		\
+    ldf.fill.nta f15 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f16 = [r2], 32;		\
+    ldf.fill.nta f17 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f18 = [r2], 32;		\
+    ldf.fill.nta f19 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f20 = [r2], 32;		\
+    ldf.fill.nta f21 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f22 = [r2], 32;		\
+    ldf.fill.nta f23 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f24 = [r2], 32;		\
+    ldf.fill.nta f25 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f26 = [r2], 32;		\
+    ldf.fill.nta f27 = [r3], 32;		\
+	;;					\
+    ldf.fill.nta f28 = [r2], 32;		\
+    ldf.fill.nta f29 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f30 = [r2], 32;		\
+    ldf.fill.nta f31 = [r3], 32;		\
+    ;;
+
+
+
+    /*
+     *      r33:    point to context_t structure
+     */
+#define	RESTORE_FPU_HIGH			\
+    add     r2 = CTX(F32), r33;			\
+    add     r3 = CTX(F33), r33;			\
+    ;;						\
+    ldf.fill.nta f32 = [r2], 32;		\
+    ldf.fill.nta f33 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f34 = [r2], 32;		\
+    ldf.fill.nta f35 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f36 = [r2], 32;		\
+    ldf.fill.nta f37 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f38 = [r2], 32;		\
+    ldf.fill.nta f39 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f40 = [r2], 32;		\
+    ldf.fill.nta f41 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f42 = [r2], 32;		\
+    ldf.fill.nta f43 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f44 = [r2], 32;		\
+    ldf.fill.nta f45 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f46 = [r2], 32;		\
+    ldf.fill.nta f47 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f48 = [r2], 32;		\
+    ldf.fill.nta f49 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f50 = [r2], 32;		\
+    ldf.fill.nta f51 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f52 = [r2], 32;		\
+    ldf.fill.nta f53 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f54 = [r2], 32;		\
+    ldf.fill.nta f55 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f56 = [r2], 32;		\
+    ldf.fill.nta f57 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f58 = [r2], 32;		\
+    ldf.fill.nta f59 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f60 = [r2], 32;		\
+    ldf.fill.nta f61 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f62 = [r2], 32;		\
+    ldf.fill.nta f63 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f64 = [r2], 32;		\
+    ldf.fill.nta f65 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f66 = [r2], 32;		\
+    ldf.fill.nta f67 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f68 = [r2], 32;		\
+    ldf.fill.nta f69 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f70 = [r2], 32;		\
+    ldf.fill.nta f71 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f72 = [r2], 32;		\
+    ldf.fill.nta f73 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f74 = [r2], 32;		\
+    ldf.fill.nta f75 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f76 = [r2], 32;		\
+    ldf.fill.nta f77 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f78 = [r2], 32;		\
+    ldf.fill.nta f79 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f80 = [r2], 32;		\
+    ldf.fill.nta f81 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f82 = [r2], 32;		\
+    ldf.fill.nta f83 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f84 = [r2], 32;		\
+    ldf.fill.nta f85 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f86 = [r2], 32;		\
+    ldf.fill.nta f87 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f88 = [r2], 32;		\
+    ldf.fill.nta f89 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f90 = [r2], 32;		\
+    ldf.fill.nta f91 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f92 = [r2], 32;		\
+    ldf.fill.nta f93 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f94 = [r2], 32;		\
+    ldf.fill.nta f95 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f96 = [r2], 32;		\
+    ldf.fill.nta f97 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f98 = [r2], 32;		\
+    ldf.fill.nta f99 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f100 = [r2], 32;		\
+    ldf.fill.nta f101 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f102 = [r2], 32;		\
+    ldf.fill.nta f103 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f104 = [r2], 32;		\
+    ldf.fill.nta f105 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f106 = [r2], 32;		\
+    ldf.fill.nta f107 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f108 = [r2], 32;		\
+    ldf.fill.nta f109 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f110 = [r2], 32;		\
+    ldf.fill.nta f111 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f112 = [r2], 32;		\
+    ldf.fill.nta f113 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f114 = [r2], 32;		\
+    ldf.fill.nta f115 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f116 = [r2], 32;		\
+    ldf.fill.nta f117 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f118 = [r2], 32;		\
+    ldf.fill.nta f119 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f120 = [r2], 32;		\
+    ldf.fill.nta f121 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f122 = [r2], 32;		\
+    ldf.fill.nta f123 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f124 = [r2], 32;		\
+    ldf.fill.nta f125 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f126 = [r2], 32;		\
+    ldf.fill.nta f127 = [r3], 32;		\
+    ;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_PTK_REGS				\
+    add r2=CTX(PKR0), r32;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1:						\
+    mov r18=pkr[r17];				\
+    ;;                     			\
+    srlz.i;					\
+    ;; 						\
+    st8 [r2]=r18, 8;				\
+    ;;    					\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_PTK_REGS	    		\
+    add r2=CTX(PKR0), r33;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1: 						\
+    ld8 r18=[r2], 8;				\
+    ;;						\
+    mov pkr[r17]=r18;				\
+    ;;    					\
+    srlz.i;					\
+    ;; 						\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+
+/*
+ * void vmm_trampoline( context_t * from,
+ *			context_t * to)
+ *
+ * 	from:	r32
+ *	to:	r33
+ *  note: interrupt disabled before call this function.
+ */
+GLOBAL_ENTRY(vmm_trampoline)
+    mov r16 = psr
+    adds r2 = CTX(PSR), r32
+    ;;
+    st8 [r2] = r16, 8       // psr
+    mov r17 = pr
+    ;;
+    st8 [r2] = r17, 8       // pr
+    mov r18 = ar.unat
+    ;;
+    st8 [r2] = r18
+    mov r17 = ar.rsc
+    ;;
+    adds r2 = CTX(RSC),r32
+    ;;
+    st8 [r2]= r17
+    mov ar.rsc =0
+    flushrs
+    ;;
+    SAVE_GENERAL_REGS
+    ;;
+    SAVE_KERNEL_REGS
+    ;;
+    SAVE_APP_REGS
+    ;;
+    SAVE_BRANCH_REGS
+    ;;
+    SAVE_CTL_REGS
+    ;;
+    SAVE_REGION_REGS
+    ;;
+    //SAVE_DEBUG_REGS
+    ;;
+    rsm  psr.dfl
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_LOW
+    ;;
+    rsm  psr.dfh
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_HIGH
+    ;;
+    SAVE_PTK_REGS
+    ;;
+    RESTORE_PTK_REGS
+    ;;
+    RESTORE_FPU_HIGH
+    ;;
+    RESTORE_FPU_LOW
+    ;;
+    //RESTORE_DEBUG_REGS
+    ;;
+    RESTORE_REGION_REGS
+    ;;
+    RESTORE_CTL_REGS
+    ;;
+    RESTORE_BRANCH_REGS
+    ;;
+    RESTORE_APP_REGS
+    ;;
+    RESTORE_KERNEL_REGS
+    ;;
+    RESTORE_GENERAL_REGS
+    ;;
+    adds r2=CTX(PSR), r33
+    ;;
+    ld8 r16=[r2], 8       // psr
+    ;;
+    mov psr.l=r16
+    ;;
+    srlz.d
+    ;;
+    ld8 r16=[r2], 8       // pr
+    ;;
+    mov pr =r16,-1
+    ld8 r16=[r2]       // unat
+    ;;
+    mov ar.unat=r16
+    ;;
+    adds r2=CTX(RSC),r33
+    ;;
+    ld8 r16 =[r2]
+    ;;
+    mov ar.rsc = r16
+    ;;
+    br.ret.sptk.few b0
+END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 0000000..e44027c
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2163 @@
+/*
+ * kvm_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/ia64regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ *   mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+	/*  2004/09/12(Kevin): Allow switch to self */
+	/*
+	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
+	 *  This kind of transition usually occurs in the very early
+	 *  stage of Linux boot up procedure. Another case is in efi
+	 *  and pal calls. (see "arch/ia64/kernel/head.S")
+	 *
+	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
+	 *  This kind of transition is found when OSYa exits efi boot
+	 *  service. Due to gva = gpa in this case (Same region),
+	 *  data access can be satisfied though itlb entry for physical
+	 *  emulation is hit.
+	 */
+	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
+	 *  This kind of transition is found in OSYa.
+	 *
+	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
+	 *  This kind of transition is found in OSYa
+	 */
+	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
+	/* (1,0,0)->(1,1,1) */
+	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
+	/*
+	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
+	 *  This kind of transition usually occurs when Linux returns
+	 *  from the low level TLB miss handlers.
+	 *  (see "arch/ia64/kernel/ivt.S")
+	 */
+	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
+	 *  This kind of transition usually occurs in Linux low level
+	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+	 *
+	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
+	 *  This kind of transition usually occurs in pal and efi calls,
+	 *  which requires running in physical mode.
+	 *  (see "arch/ia64/kernel/head.S")
+	 *  (1,1,1)->(1,0,0)
+	 */
+
+	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
+};
+
+void physical_mode_init(struct kvm_vcpu  *vcpu)
+{
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+void switch_to_physical_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	/* Save original virtual mode rr[0] and rr[4] */
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
+	ia64_srlz_d();
+
+	ia64_set_psr(psr);
+	return;
+}
+
+
+void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+	return;
+}
+
+static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
+{
+	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+	int act;
+	act = mm_switch_action(old_psr, new_psr);
+	switch (act) {
+	case SW_V2P:
+		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
+		old_psr.val, new_psr.val);*/
+		switch_to_physical_rid(vcpu);
+		/*
+		 * Set rse to enforced lazy, to prevent active rse
+		 *save/restor when guest physical mode.
+		 */
+		vcpu->arch.mode_flags |= GUEST_IN_PHY;
+		break;
+	case SW_P2V:
+		switch_to_virtual_rid(vcpu);
+		/*
+		 * recover old mode which is saved when entering
+		 * guest physical mode
+		 */
+		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+		break;
+	case SW_SELF:
+		break;
+	case SW_NOP:
+		break;
+	default:
+		/* Sanity check */
+		break;
+	}
+	return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+
+	if ((old_psr.dt != new_psr.dt)
+			|| (old_psr.it != new_psr.it)
+			|| (old_psr.rt != new_psr.rt))
+		switch_mm_mode(vcpu, old_psr, new_psr);
+
+	return;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu)) {
+		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+		switch_to_virtual_rid(vcpu);
+	}
+	return;
+}
+
+/* Recover always follows prepare */
+void recover_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu))
+		switch_to_physical_rid(vcpu);
+	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+	return;
+}
+
+#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
+
+static u16 gr_info[32] = {
+	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
+	RPT(r1), RPT(r2), RPT(r3),
+	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+#define IA64_FIRST_STACKED_GR   32
+#define IA64_FIRST_ROTATING_FR  32
+
+static inline unsigned long
+rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+	reg += rrb;
+	if (reg >= sor)
+		reg -= sor;
+	return reg;
+}
+
+/*
+ * Return the (rotated) index for floating point register
+ * be in the REGNUM (REGNUM must range from 32-127,
+ * result is in the range from 0-95.
+ */
+static inline unsigned long fph_index(struct kvm_pt_regs *regs,
+						long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
+							long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+	int i = 0;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	if (delta < 0) {
+		while (delta <= -0x3f) {
+			i--;
+			delta += 0x3f;
+		}
+	} else {
+		while (delta >= 0x3f) {
+			i++;
+			delta -= 0x3f;
+		}
+	}
+
+	return addr + num_regs + i;
+}
+
+static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+					unsigned long *val, int *nat)
+{
+	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	new_rsc = old_rsc&(~(0x3));
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	bsp = kbs + (regs->loadrs >> 19);
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	if (addr >= bspstore) {
+		ia64_flushrs();
+		ia64_mf();
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	}
+	*val = *addr;
+	if (nat) {
+		if (bspstore < rnat_addr)
+			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
+							& nat_mask);
+		else
+			*nat = (int)!!((*rnat_addr) & nat_mask);
+		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+	}
+}
+
+void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+				unsigned long val, unsigned long nat)
+{
+	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc, psr;
+	unsigned long rnat;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	/* put RSC to lazy mode, and set loadrs 0 */
+	new_rsc = old_rsc & (~0x3fff0003);
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	local_irq_save(psr);
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	if (addr >= bspstore) {
+
+		ia64_flushrs();
+		ia64_mf();
+		*addr = val;
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		if (bspstore < rnat_addr)
+			rnat = rnat & (~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr)&(~nat_mask);
+
+		ia64_mf();
+		ia64_loadrs();
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	} else {
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		*addr = val;
+		if (bspstore < rnat_addr)
+			rnat = rnat&(~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr) & (~nat_mask);
+
+		ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	}
+	local_irq_restore(psr);
+	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+}
+
+void getreg(unsigned long regnum, unsigned long *val,
+				int *nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr, *unat;
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;;
+
+	addr += gr_info[regnum];
+
+	*val  = *(unsigned long *)addr;
+	/*
+	 * do it only when requested
+	 */
+	if (nat)
+		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
+}
+
+void setreg(unsigned long regnum, unsigned long val,
+			int nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+	/*
+	 * First takes care of stacked registers
+	 */
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += gr_info[regnum];
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = 1UL << ((addr >> 3) & 0x3f);
+	if (nat)
+		*unat |= bitmask;
+	 else
+		*unat &= ~bitmask;
+
+}
+
+u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	u64 val;
+
+	if (!reg)
+		return 0;
+	getreg(reg, &val, 0, regs);
+	return val;
+}
+
+void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	long sof = (regs->cr_ifs) & 0x7f;
+
+	if (!reg)
+		return;
+	if (reg >= sof + 32)
+		return;
+	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
+}
+
+void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+				struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+#define CASE_FIXED_FP(reg)			\
+	case  (reg) :				\
+		ia64_stf_spill(fpval, reg);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(0);
+		CASE_FIXED_FP(1);
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+#undef CASE_FIXED_FP
+}
+
+void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+					struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+
+#define CASE_FIXED_FP(reg)			\
+	case (reg) :				\
+		ia64_ldf_fill(reg, fpval);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+}
+
+void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	if (reg > 1)
+		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+/************************************************************************
+ * lsapic timer
+ ***********************************************************************/
+u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_itc;
+	guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
+
+	if (guest_itc >= VMX(vcpu, last_itc)) {
+		VMX(vcpu, last_itc) = guest_itc;
+		return  guest_itc;
+	} else
+		return VMX(vcpu, last_itc);
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
+static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct kvm_vcpu *v;
+	int i;
+	long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
+	unsigned long vitv = VCPU(vcpu, itv);
+
+	if (vcpu->vcpu_id == 0) {
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+			VMX(v, itc_offset) = itc_offset;
+			VMX(v, last_itc) = 0;
+		}
+	}
+	VMX(vcpu, last_itc) = 0;
+	if (VCPU(vcpu, itm) <= val) {
+		VMX(vcpu, itc_check) = 0;
+		vcpu_unpend_interrupt(vcpu, vitv);
+	} else {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
+	}
+
+}
+
+static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itm));
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
+{
+	unsigned long vitv = VCPU(vcpu, itv);
+	VCPU(vcpu, itm) = val;
+
+	if (val > vcpu_get_itc(vcpu)) {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_unpend_interrupt(vcpu, vitv);
+		VMX(vcpu, timer_pending) = 0;
+	} else
+		VMX(vcpu, itc_check) = 0;
+}
+
+#define  ITV_VECTOR(itv)    (itv&0xff)
+#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
+
+static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itv) = val;
+	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
+		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
+		vcpu->arch.timer_pending = 0;
+	}
+}
+
+static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
+{
+	int vec;
+
+	vec = highest_inservice_irq(vcpu);
+	if (vec == NULL_VECTOR)
+		return;
+	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
+	VCPU(vcpu, eoi) = 0;
+	vcpu->arch.irq_new_pending = 1;
+
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
+{
+	union ia64_tpr vtpr;
+
+	vtpr.val = VCPU(vcpu, tpr);
+
+	if (h_inservice == NMI_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == NMI_VECTOR) {
+		/* Non Maskable Interrupt */
+		return IRQ_NO_MASKED;
+	}
+
+	if (h_inservice == ExtINT_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == ExtINT_VECTOR) {
+		if (vtpr.mmi) {
+			/* mask all external IRQ */
+			return IRQ_MASKED_BY_VTPR;
+		} else
+			return IRQ_NO_MASKED;
+	}
+
+	if (is_higher_irq(h_pending, h_inservice)) {
+		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
+			return IRQ_NO_MASKED;
+		else
+			return IRQ_MASKED_BY_VTPR;
+	} else {
+		return IRQ_MASKED_BY_INSVC;
+	}
+}
+
+void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+
+	vcpu->arch.irq_new_pending = 1;
+}
+
+void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+	if (ret) {
+		vcpu->arch.irq_new_pending = 1;
+		wmb();
+	}
+}
+
+void update_vhpi(struct kvm_vcpu *vcpu, int vec)
+{
+	u64 vhpi;
+
+	if (vec == NULL_VECTOR)
+		vhpi = 0;
+	else if (vec == NMI_VECTOR)
+		vhpi = 32;
+	else if (vec == ExtINT_VECTOR)
+		vhpi = 16;
+	else
+		vhpi = vec >> 4;
+
+	VCPU(vcpu, vhpi) = vhpi;
+	if (VCPU(vcpu, vac).a_int)
+		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
+				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
+}
+
+u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
+{
+	int vec, h_inservice, mask;
+
+	vec = highest_pending_irq(vcpu);
+	h_inservice = highest_inservice_irq(vcpu);
+	mask = irq_masked(vcpu, vec, h_inservice);
+	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	if (mask == IRQ_MASKED_BY_VTPR) {
+		update_vhpi(vcpu, vec);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
+	vcpu_unpend_interrupt(vcpu, vec);
+	return  (u64)vec;
+}
+
+/**************************************************************************
+  Privileged operation emulation routines
+ **************************************************************************/
+u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_pta vpta;
+	union ia64_rr vrr;
+	u64 pval;
+	u64 vhpt_offset;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
+				vpta.val, 0, 0, 0, 0);
+	} else {
+		pval = (vadr & VRN_MASK) | vhpt_offset |
+			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
+	}
+	return  pval;
+}
+
+u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_rr vrr;
+	union ia64_pta vpta;
+	u64 pval;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
+						0, 0, 0, 0, 0);
+	} else
+		pval = 1;
+
+	return  pval;
+}
+
+u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	struct thash_data *data;
+	union ia64_pta vpta;
+	u64 key;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	if (vpta.vf == 0) {
+		key = 1;
+		return key;
+	}
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (!data || !data->p)
+		key = 1;
+	else
+		key = data->key;
+
+	return key;
+}
+
+
+
+void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long thash, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	thash = vcpu_thash(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
+}
+
+
+void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tag, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	tag = vcpu_ttag(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
+}
+
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
+{
+	struct thash_data *data;
+	union ia64_isr visr, pt_isr;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr vpsr;
+
+	regs = vcpu_regs(vcpu);
+	pt_isr.val = VMX(vcpu, cr_isr);
+	visr.val = 0;
+	visr.ei = pt_isr.ei;
+	visr.ir = pt_isr.ir;
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	visr.na = 1;
+
+	data = vhpt_lookup(vadr);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			*padr = (data->gpaddr >> data->ps << data->ps) |
+				(vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
+				| (vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			alt_dtlb(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	} else {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			dvhpt_fault(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	}
+
+	return IA64_NO_FAULT;
+}
+
+
+int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+
+	if (vcpu_tpa(vcpu, r3, &r1))
+		return IA64_FAULT;
+
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+	return(IA64_NO_FAULT);
+}
+
+void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+	r1 = vcpu_tak(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+}
+
+
+/************************************
+ * Insert/Purge translation register/cache
+ ************************************/
+void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
+}
+
+void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
+}
+
+void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 ps, va, rid;
+	struct thash_data *p_itr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+	rid = vcpu_get_rr(vcpu, ifa);
+	rid = rid & RR_RID_MASK;
+	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
+	vcpu_set_tr(p_itr, pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
+}
+
+
+void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 gpfn;
+	u64 ps, va, rid;
+	struct thash_data *p_dtr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+
+	if (ps != _PAGE_SIZE_16M)
+		thash_purge_entries(vcpu, va, ps);
+	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	if (__gpfn_is_io(gpfn))
+		pte |= VTLB_PTE_IO;
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
+	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
+							pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
+}
+
+void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
+		vcpu->arch.dtrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
+		vcpu->arch.itrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	va = PAGEALIGN(va, ps);
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
+{
+	thash_purge_all(vcpu);
+}
+
+void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_PTC_G;
+
+	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
+	p->u.ptc_g_data.vaddr = va;
+	p->u.ptc_g_data.ps = ps;
+	vmm_transition(vcpu);
+	/* Do Local Purge Here*/
+	vcpu_ptc_l(vcpu, va, ps);
+	local_irq_restore(psr);
+}
+
+
+void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	vcpu_ptc_ga(vcpu, va, ps);
+}
+
+void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	vcpu_ptc_e(vcpu, ifa);
+}
+
+void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
+}
+
+
+
+void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
+}
+
+void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_d(vcpu, pte, itir, ifa);
+}
+
+void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_i(vcpu, pte, itir, ifa);
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+ *************************************/
+
+void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long imm;
+
+	if (inst.M30.s)
+		imm = -inst.M30.imm;
+	else
+		imm = inst.M30.imm;
+
+	vcpu_set_itc(vcpu, imm);
+}
+
+void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
+	vcpu_set_itc(vcpu, r2);
+}
+
+
+void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1;
+
+	r1 = vcpu_get_itc(vcpu);
+	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
+}
+/**************************************************************************
+  struct kvm_vcpu*protection key register access routines
+ **************************************************************************/
+
+unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	return ((unsigned long)ia64_get_pkr(reg));
+}
+
+void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
+{
+	ia64_set_pkr(reg, val);
+}
+
+
+unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+
+
+/********************************
+ * Moves to privileged registers
+ ********************************/
+unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
+					unsigned long val)
+{
+	union ia64_rr oldrr, newrr;
+	unsigned long rrval;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	oldrr.val = vcpu_get_rr(vcpu, reg);
+	newrr.val = val;
+	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
+
+	switch ((unsigned long)(reg >> VRN_SHIFT)) {
+	case VRN6:
+		vcpu->arch.vmm_rr = vrrtomrr(val);
+		local_irq_save(psr);
+		p->exit_reason = EXIT_REASON_SWITCH_RR6;
+		vmm_transition(vcpu);
+		local_irq_restore(psr);
+		break;
+	case VRN4:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr4 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	case VRN0:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr0 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	default:
+		ia64_set_rr(reg, vrrtomrr(val));
+		break;
+	}
+
+	return (IA64_NO_FAULT);
+}
+
+
+
+void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_rr(vcpu, r3, r2);
+}
+
+void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmc(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmd(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	u64 r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pkr(vcpu, r3, r2);
+}
+
+
+
+void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_rr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pkr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_dbr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_ibr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pmc(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+
+unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	/* FIXME: This could get called as a result of a rsvd-reg fault */
+	if (reg > (ia64_get_cpuid(3) & 0xff))
+		return 0;
+	else
+		return ia64_get_cpuid(reg);
+}
+
+void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_cpuid(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	VCPU(vcpu, tpr) = val;
+	vcpu->arch.irq_check = 1;
+}
+
+unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
+	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
+
+	switch (inst.M32.cr3) {
+	case 0:
+		vcpu_set_dcr(vcpu, r2);
+		break;
+	case 1:
+		vcpu_set_itm(vcpu, r2);
+		break;
+	case 66:
+		vcpu_set_tpr(vcpu, r2);
+		break;
+	case 67:
+		vcpu_set_eoi(vcpu, r2);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+
+unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tgt = inst.M33.r1;
+	unsigned long val;
+
+	switch (inst.M33.cr3) {
+	case 65:
+		val = vcpu_get_ivr(vcpu);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+
+	case 67:
+		vcpu_set_gr(vcpu, tgt, 0L, 0);
+		break;
+	default:
+		val = VCPU(vcpu, vcr[inst.M33.cr3]);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+	}
+
+	return 0;
+}
+
+
+
+void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+
+	unsigned long mask;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr old_psr, new_psr;
+
+	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	regs = vcpu_regs(vcpu);
+	/* We only support guest as:
+	 *  vpsr.pk = 0
+	 *  vpsr.is = 0
+	 * Otherwise panic
+	 */
+	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
+		panic_vm(vcpu);
+
+	/*
+	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+	 * Since these bits will become 0, after success execution of each
+	 * instruction, we will change set them to mIA64_PSR
+	 */
+	VCPU(vcpu, vpsr) = val
+		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
+			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
+
+	if (!old_psr.i && (val & IA64_PSR_I)) {
+		/* vpsr.i 0->1 */
+		vcpu->arch.irq_check = 1;
+	}
+	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	/*
+	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+	 * , except for the following bits:
+	 *  ic/i/dt/si/rt/mc/it/bn/vm
+	 */
+	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+		IA64_PSR_VM;
+
+	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
+
+	check_mm_mode_switch(vcpu, old_psr, new_psr);
+
+	return ;
+}
+
+unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
+{
+	struct ia64_psr vpsr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	if (!vpsr.ic)
+		VCPU(vcpu, ifs) = regs->cr_ifs;
+	regs->cr_ifs = IA64_IFS_V;
+	return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+  VCPU banked general register access routines
+ **************************************************************************/
+#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {     							\
+		__asm__ __volatile__ (					\
+				";;extr.u %0 = %3,%6,16;;\n"		\
+				"dep %1 = %0, %1, 0, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 16, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
+				"r"(*runat), "r"(b1unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw0(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+
+	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
+		for (i = 0; i < 16; i++) {
+			*b1++ = *r;
+			*r++ = *b0++;
+		}
+		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
+	}
+}
+
+#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {             						\
+		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
+				"dep %1 = %0, %1, 16, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 0, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
+				"r"(*runat), "r"(b0unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw1(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
+		for (i = 0; i < 16; i++) {
+			*b0++ = *r;
+			*r++ = *b1++;
+		}
+		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
+	}
+}
+
+
+
+
+void vcpu_rfi(struct kvm_vcpu *vcpu)
+{
+	unsigned long ifs, psr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	psr = VCPU(vcpu, ipsr);
+	if (psr & IA64_PSR_BN)
+		vcpu_bsw1(vcpu);
+	else
+		vcpu_bsw0(vcpu);
+	vcpu_set_psr(vcpu, psr);
+	ifs = VCPU(vcpu, ifs);
+	if (ifs >> 63)
+		regs->cr_ifs = ifs;
+	regs->cr_iip = VCPU(vcpu, iip);
+}
+
+
+/*
+   VPSR can't keep track of below bits of guest PSR
+   This function gets guest PSR
+ */
+
+unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
+{
+	unsigned long mask;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
+		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
+	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
+}
+
+void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
+					| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr &= (~imm24);
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
+				| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr |= imm24;
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+/* Generate Mask
+ * Parameter:
+ *  bit -- starting bit
+ *  len -- how many bits
+ */
+#define MASK(bit,len)				   	\
+({							\
+		__u64	ret;				\
+							\
+		__asm __volatile("dep %0=-1, r0, %1, %2"\
+				: "=r" (ret):		\
+		  "M" (bit),				\
+		  "M" (len));				\
+		ret;					\
+})
+
+void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
+	vcpu_set_psr(vcpu, val);
+}
+
+void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_gr(vcpu, inst.M35.r2);
+	vcpu_set_psr_l(vcpu, val);
+}
+
+void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_psr(vcpu);
+	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
+}
+
+void vcpu_increment_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+	if (ipsr->ri == 2) {
+		ipsr->ri = 0;
+		regs->cr_iip += 16;
+	} else
+		ipsr->ri++;
+}
+
+void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+
+	if (ipsr->ri == 0) {
+		ipsr->ri = 2;
+		regs->cr_iip -= 16;
+	} else
+		ipsr->ri--;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
+{
+	unsigned long status, cause, opcode ;
+	INST64 inst;
+
+	status = IA64_NO_FAULT;
+	cause = VMX(vcpu, cause);
+	opcode = VMX(vcpu, opcode);
+	inst.inst = opcode;
+	/*
+	 * Switch to actual virtual rid in rr0 and rr4,
+	 * which is required by some tlb related instructions.
+	 */
+	prepare_if_physical_mode(vcpu);
+
+	switch (cause) {
+	case EVENT_RSM:
+		kvm_rsm(vcpu, inst);
+		break;
+	case EVENT_SSM:
+		kvm_ssm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PSR:
+		kvm_mov_to_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PSR:
+		kvm_mov_from_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CR:
+		kvm_mov_from_cr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_CR:
+		kvm_mov_to_cr(vcpu, inst);
+		break;
+	case EVENT_BSW_0:
+		vcpu_bsw0(vcpu);
+		break;
+	case EVENT_BSW_1:
+		vcpu_bsw1(vcpu);
+		break;
+	case EVENT_COVER:
+		vcpu_cover(vcpu);
+		break;
+	case EVENT_RFI:
+		vcpu_rfi(vcpu);
+		break;
+	case EVENT_ITR_D:
+		kvm_itr_d(vcpu, inst);
+		break;
+	case EVENT_ITR_I:
+		kvm_itr_i(vcpu, inst);
+		break;
+	case EVENT_PTR_D:
+		kvm_ptr_d(vcpu, inst);
+		break;
+	case EVENT_PTR_I:
+		kvm_ptr_i(vcpu, inst);
+		break;
+	case EVENT_ITC_D:
+		kvm_itc_d(vcpu, inst);
+		break;
+	case EVENT_ITC_I:
+		kvm_itc_i(vcpu, inst);
+		break;
+	case EVENT_PTC_L:
+		kvm_ptc_l(vcpu, inst);
+		break;
+	case EVENT_PTC_G:
+		kvm_ptc_g(vcpu, inst);
+		break;
+	case EVENT_PTC_GA:
+		kvm_ptc_ga(vcpu, inst);
+		break;
+	case EVENT_PTC_E:
+		kvm_ptc_e(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_RR:
+		kvm_mov_to_rr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_RR:
+		kvm_mov_from_rr(vcpu, inst);
+		break;
+	case EVENT_THASH:
+		kvm_thash(vcpu, inst);
+		break;
+	case EVENT_TTAG:
+		kvm_ttag(vcpu, inst);
+		break;
+	case EVENT_TPA:
+		status = kvm_tpa(vcpu, inst);
+		break;
+	case EVENT_TAK:
+		kvm_tak(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR_IMM:
+		kvm_mov_to_ar_imm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR:
+		kvm_mov_to_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_AR:
+		kvm_mov_from_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_DBR:
+		kvm_mov_to_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_IBR:
+		kvm_mov_to_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMC:
+		kvm_mov_to_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMD:
+		kvm_mov_to_pmd(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PKR:
+		kvm_mov_to_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_DBR:
+		kvm_mov_from_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_IBR:
+		kvm_mov_from_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PMC:
+		kvm_mov_from_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PKR:
+		kvm_mov_from_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CPUID:
+		kvm_mov_from_cpuid(vcpu, inst);
+		break;
+	case EVENT_VMSW:
+		status = IA64_FAULT;
+		break;
+	default:
+		break;
+	};
+	/*Assume all status is NO_FAULT ?*/
+	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
+		vcpu_increment_iip(vcpu);
+
+	recover_if_physical_mode(vcpu);
+}
+
+void init_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+	VMX(vcpu, vrr[0]) = 0x38;
+	VMX(vcpu, vrr[1]) = 0x38;
+	VMX(vcpu, vrr[2]) = 0x38;
+	VMX(vcpu, vrr[3]) = 0x38;
+	VMX(vcpu, vrr[4]) = 0x38;
+	VMX(vcpu, vrr[5]) = 0x38;
+	VMX(vcpu, vrr[6]) = 0x38;
+	VMX(vcpu, vrr[7]) = 0x38;
+	VCPU(vcpu, vpsr) = IA64_PSR_BN;
+	VCPU(vcpu, dcr) = 0;
+	/* pta.size must not be 0.  The minimum is 15 (32k) */
+	VCPU(vcpu, pta) = 15 << 2;
+	VCPU(vcpu, itv) = 0x10000;
+	VCPU(vcpu, itm) = 0;
+	VMX(vcpu, last_itc) = 0;
+
+	VCPU(vcpu, lid) = VCPU_LID(vcpu);
+	VCPU(vcpu, ivr) = 0;
+	VCPU(vcpu, tpr) = 0x10000;
+	VCPU(vcpu, eoi) = 0;
+	VCPU(vcpu, irr[0]) = 0;
+	VCPU(vcpu, irr[1]) = 0;
+	VCPU(vcpu, irr[2]) = 0;
+	VCPU(vcpu, irr[3]) = 0;
+	VCPU(vcpu, pmv) = 0x10000;
+	VCPU(vcpu, cmcv) = 0x10000;
+	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
+	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
+	update_vhpi(vcpu, NULL_VECTOR);
+	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
+
+	for (i = 0; i < 4; i++)
+		VLSAPIC_INSVC(vcpu, i) = 0;
+}
+
+void kvm_init_all_rr(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	/* WARNING: not allow co-exist of both virtual mode and physical
+	 * mode in same region
+	 */
+
+	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
+	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
+
+	if (is_physical_mode(vcpu)) {
+		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+			panic_vm(vcpu);
+
+		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
+		ia64_dv_serialize_data();
+	} else {
+		ia64_set_rr((VRN0 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr4);
+		ia64_dv_serialize_data();
+	}
+	ia64_set_rr((VRN1 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN1])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN2 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN2])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN3 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN3])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN5 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN5])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN7 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN7])));
+	ia64_dv_serialize_data();
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+}
+
+int vmm_entry(void)
+{
+	struct kvm_vcpu *v;
+	v = current_vcpu;
+
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_init_vtlb(v);
+	kvm_init_vhpt(v);
+	init_vcpu(v);
+	kvm_init_all_rr(v);
+	vmm_reset_entry();
+
+	return 0;
+}
+
+void panic_vm(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+
+	p->exit_reason = EXIT_REASON_VM_PANIC;
+	vmm_transition(v);
+	/*Never to return*/
+	while (1);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 0000000..b0fcfb6
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,740 @@
+/*
+ *  vcpu.h: vcpu routines
+ *  	Copyright (c) 2005, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * 	Copyright (c) 2007, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *	Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#ifndef __KVM_VCPU_H__
+#define __KVM_VCPU_H__
+
+#include <asm/types.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+
+#ifndef __ASSEMBLY__
+#include "vti.h"
+
+#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
+
+typedef unsigned long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+	unsigned long i64[2];
+	struct { unsigned long template:5, slot0:41, slot1a:18,
+		slot1b:23, slot2:41; };
+	/* NOTE: following doesn't work because bitfields can't cross natural
+	   size boundaries
+	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
+} IA64_BUNDLE;
+
+typedef union U_INST64_A5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
+		imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
+		wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
+} INST64_I27;
+
+typedef union U_INST64_I28 { /* not privileged (mov from AR) */
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
+		x3:3, s:1, major:4; };
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M37 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
+		i:1, major:4; };
+} INST64_M37;
+
+typedef union U_INST64_M41 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
+		x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64_M47 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+
+typedef union U_INST64_M1{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M1;
+
+typedef union U_INST64_M2{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M2;
+
+typedef union U_INST64_M3{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M3;
+
+typedef union U_INST64_M4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M4;
+
+typedef union U_INST64_M5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M5;
+
+typedef union U_INST64_M6 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M6;
+
+typedef union U_INST64_M9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M9;
+
+typedef union U_INST64_M10 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M10;
+
+typedef union U_INST64_M12 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M12;
+
+typedef union U_INST64_M15 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M15;
+
+typedef union U_INST64 {
+	IA64_INST inst;
+	struct { unsigned long :37, major:4; } generic;
+	INST64_A5 A5;	/* used in build_hypercall_bundle only */
+	INST64_B4 B4;	/* used in build_hypercall_bundle only */
+	INST64_B8 B8;	/* rfi, bsw.[01] */
+	INST64_B9 B9;	/* break.b */
+	INST64_I19 I19;	/* used in build_hypercall_bundle only */
+	INST64_I26 I26;	/* mov register to ar (I unit) */
+	INST64_I27 I27;	/* mov immediate to ar (I unit) */
+	INST64_I28 I28;	/* mov from ar (I unit) */
+	INST64_M1  M1;	/* ld integer */
+	INST64_M2  M2;
+	INST64_M3  M3;
+	INST64_M4  M4;	/* st integer */
+	INST64_M5  M5;
+	INST64_M6  M6;	/* ldfd floating pointer 		*/
+	INST64_M9  M9;	/* stfd floating pointer		*/
+	INST64_M10 M10;	/* stfd floating pointer		*/
+	INST64_M12 M12;     /* ldfd pair floating pointer		*/
+	INST64_M15 M15;	/* lfetch + imm update			*/
+	INST64_M28 M28;	/* purge translation cache entry	*/
+	INST64_M29 M29;	/* mov register to ar (M unit)		*/
+	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
+	INST64_M31 M31;	/* mov from ar (M unit)			*/
+	INST64_M32 M32;	/* mov reg to cr			*/
+	INST64_M33 M33;	/* mov from cr				*/
+	INST64_M35 M35;	/* mov to psr				*/
+	INST64_M36 M36;	/* mov from psr				*/
+	INST64_M37 M37;	/* break.m				*/
+	INST64_M41 M41;	/* translation cache insert		*/
+	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
+	INST64_M43 M43;	/* mov from indirect reg		*/
+	INST64_M44 M44;	/* set/reset system mask		*/
+	INST64_M45 M45;	/* translation purge			*/
+	INST64_M46 M46;	/* translation access (tpa,tak)		*/
+	INST64_M47 M47;	/* purge translation entry		*/
+} INST64;
+
+#define MASK_41 ((unsigned long)0x1ffffffffff)
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB         0x0
+#define VA_MATTR_UC         0x4
+#define VA_MATTR_UCE        0x5
+#define VA_MATTR_WC         0x6
+#define VA_MATTR_NATPAGE    0x7
+
+#define PMASK(size)         (~((size) - 1))
+#define PSIZE(size)         (1UL<<(size))
+#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
+#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
+#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
+
+#define ARCH_PAGE_SHIFT   12
+
+#define INVALID_TI_TAG (1UL << 63)
+
+#define VTLB_PTE_P_BIT      0
+#define VTLB_PTE_IO_BIT     60
+#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
+#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
+
+#define vcpu_quick_region_check(_tr_regions,_ifa)		\
+	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
+
+#define vcpu_quick_region_set(_tr_regions,_ifa)             \
+	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
+
+static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
+		u64 va, u64 rid)
+{
+	trp->page_flags = pte;
+	trp->itir = itir;
+	trp->vadr = va;
+	trp->rid = rid;
+}
+
+extern u64 kvm_lookup_mpa(u64 gpfn);
+extern u64 kvm_gpa_to_mpa(u64 gpa);
+
+/* Return I/O type if trye */
+#define __gpfn_is_io(gpfn)			\
+	({						\
+	 u64 pte, ret = 0;			\
+	 pte = kvm_lookup_mpa(gpfn);		\
+	 if (!(pte & GPFN_INV_MASK))		\
+	 ret = pte & GPFN_IO_MASK;	\
+	 ret;					\
+	 })
+
+#endif
+
+#define IA64_NO_FAULT	0
+#define IA64_FAULT	1
+
+#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
+
+#define SW_BAD  0   /* Bad mode transitition */
+#define SW_V2P  1   /* Physical emulatino is activated */
+#define SW_P2V  2   /* Exit physical mode emulation */
+#define SW_SELF 3   /* No mode transition */
+#define SW_NOP  4   /* Mode transition, but without action required */
+
+#define GUEST_IN_PHY    0x1
+#define GUEST_PHY_EMUL  0x2
+
+#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
+
+#define VRN_SHIFT	61
+#define VRN_MASK	0xe000000000000000
+#define VRN0		0x0UL
+#define VRN1		0x1UL
+#define VRN2		0x2UL
+#define VRN3		0x3UL
+#define VRN4		0x4UL
+#define VRN5		0x5UL
+#define VRN6		0x6UL
+#define VRN7		0x7UL
+
+#define IRQ_NO_MASKED         0
+#define IRQ_MASKED_BY_VTPR    1
+#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
+
+#define PTA_BASE_SHIFT      15
+
+#define IA64_PSR_VM_BIT     46
+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
+
+/* Interruption Function State */
+#define IA64_IFS_V_BIT      63
+#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/gcc_intrin.h>
+
+#define is_physical_mode(v)		\
+	((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v)	\
+	(!is_physical_mode(v))
+
+#define MODE_IND(psr)	\
+	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#define _vmm_raw_spin_lock(x)						\
+	do {								\
+		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
+		__u64 ia64_spinlock_val;				\
+		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+		if (unlikely(ia64_spinlock_val)) {			\
+			do {						\
+				while (*ia64_spinlock_ptr)		\
+				ia64_barrier();				\
+				ia64_spinlock_val =			\
+				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+			} while (ia64_spinlock_val);			\
+		}							\
+	} while (0)
+
+#define _vmm_raw_spin_unlock(x)				\
+	do { barrier();				\
+		((spinlock_t *)x)->raw_lock.lock = 0; } \
+while (0)
+
+void vmm_spin_lock(spinlock_t *lock);
+void vmm_spin_unlock(spinlock_t *lock);
+enum {
+	I_TLB = 1,
+	D_TLB = 2
+};
+
+union kvm_va {
+	struct {
+		unsigned long off : 60;		/* intra-region offset */
+		unsigned long reg :  4;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+};
+
+#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+						_v.f.reg = 0; _v.l; })
+#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+				_v.f.reg = -1; _v.p; })
+
+#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.rid; })
+#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.ps; })
+#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
+						_v.ve; })
+
+enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
+enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
+
+#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
+#define VMX(_v, _x)  ((_v)->arch._x)
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
+#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
+
+static inline unsigned long itir_ps(unsigned long itir)
+{
+	return ((itir >> 2) & 0x3f);
+}
+
+
+/**************************************************************************
+  VCPU control register access routines
+ **************************************************************************/
+
+static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itir));
+}
+
+static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itir) = val;
+}
+
+static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, ifa));
+}
+
+static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifa) = val;
+}
+
+static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, iva));
+}
+
+static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, pta));
+}
+
+static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, lid));
+}
+
+static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, tpr));
+}
+
+static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
+{
+	return (0UL);		/*reads of eoi always return 0 */
+}
+
+static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[0]));
+}
+
+static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[1]));
+}
+
+static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[2]));
+}
+
+static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[3]));
+}
+
+static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
+{
+	ia64_setreg(_IA64_REG_CR_DCR, val);
+}
+
+static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, isr) = val;
+}
+
+static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, lid) = val;
+}
+
+static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ipsr) = val;
+}
+
+static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iip) = val;
+}
+
+static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifs) = val;
+}
+
+static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iipa) = val;
+}
+
+static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iha) = val;
+}
+
+
+static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return vcpu->arch.vrr[reg>>61];
+}
+
+/**************************************************************************
+  VCPU debug breakpoint register access routines
+ **************************************************************************/
+
+static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	__ia64_set_dbr(reg, val);
+}
+
+static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	ia64_set_ibr(reg, val);
+}
+
+static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)__ia64_get_dbr(reg));
+}
+
+static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)ia64_get_ibr(reg));
+}
+
+/**************************************************************************
+  VCPU performance monitor register access routines
+ **************************************************************************/
+static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMC registers are discarded */
+	ia64_set_pmc(reg, val);
+}
+
+static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMD registers are discarded */
+	ia64_set_pmd(reg, val);
+}
+
+static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMC registers return zero */
+	return ((u64)ia64_get_pmc(reg));
+}
+
+static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMD registers return zero */
+	return ((u64)ia64_get_pmd(reg));
+}
+
+static inline unsigned long vrrtomrr(unsigned long val)
+{
+	union ia64_rr rr;
+	rr.val = val;
+	rr.rid = (rr.rid << 4) | 0xe;
+	if (rr.ps > PAGE_SHIFT)
+		rr.ps = PAGE_SHIFT;
+	rr.ve = 1;
+	return rr.val;
+}
+
+
+static inline int highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+	return NULL_VECTOR;
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static inline int is_higher_irq(int pending, int inservice)
+{
+	return ((pending > inservice)
+			|| ((pending != NULL_VECTOR)
+				&& (inservice == NULL_VECTOR)));
+}
+
+static inline int is_higher_class(int pending, int mic)
+{
+	return ((pending >> 4) > mic);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ *        NULL_VECTOR: when no pending.
+ */
+static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&VCPU(vcpu, irr[0]));
+}
+
+static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
+{
+	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
+}
+
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+					struct ia64_fpreg *val);
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+					struct ia64_fpreg *val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
+extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
+extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
+extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
+extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
+					u64 itir, u64 va, int type);
+extern struct thash_data *vhpt_lookup(u64 va);
+extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
+extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
+extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
+extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
+extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+		u64 itir, u64 ifa, int type);
+extern void thash_purge_all(struct kvm_vcpu *v);
+extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
+						u64 va, int is_data);
+extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
+						u64 ps, int is_data);
+
+extern void vcpu_increment_iip(struct kvm_vcpu *v);
+extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
+extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
+extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
+extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
+extern void nested_dtlb(struct kvm_vcpu *vcpu);
+extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
+extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
+
+extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
+extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
+
+extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
+extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
+extern void vmm_transition(struct kvm_vcpu *vcpu);
+extern void vmm_trampoline(union context *from, union context *to);
+extern int vmm_entry(void);
+extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
+
+extern void vmm_reset_entry(void);
+void kvm_init_vtlb(struct kvm_vcpu *v);
+void kvm_init_vhpt(struct kvm_vcpu *v);
+void thash_init(struct thash_cb *hcb, u64 sz);
+
+void panic_vm(struct kvm_vcpu *v);
+
+extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
+		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+#endif
+#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 0000000..2275bf4
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,66 @@
+/*
+ * vmm.c: vmm module interface with kvm module
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include<linux/module.h>
+#include<asm/fpswa.h>
+
+#include "vcpu.h"
+
+MODULE_AUTHOR("Intel");
+MODULE_LICENSE("GPL");
+
+extern char kvm_ia64_ivt;
+extern fpswa_interface_t *vmm_fpswa_interface;
+
+struct kvm_vmm_info vmm_info = {
+	.module	     = THIS_MODULE,
+	.vmm_entry   = vmm_entry,
+	.tramp_entry = vmm_trampoline,
+	.vmm_ivt     = (unsigned long)&kvm_ia64_ivt,
+};
+
+static int __init  kvm_vmm_init(void)
+{
+
+	vmm_fpswa_interface = fpswa_interface;
+
+	/*Register vmm data to kvm side*/
+	return kvm_init(&vmm_info, 1024, THIS_MODULE);
+}
+
+static void __exit kvm_vmm_exit(void)
+{
+	kvm_exit();
+	return ;
+}
+
+void vmm_spin_lock(spinlock_t *lock)
+{
+	_vmm_raw_spin_lock(lock);
+}
+
+void vmm_spin_unlock(spinlock_t *lock)
+{
+	_vmm_raw_spin_unlock(lock);
+}
+module_init(kvm_vmm_init)
+module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 0000000..3ee5f48
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1424 @@
+/*
+ * /ia64/kvm_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *      Stephane Eranian <eranian@hpl.hp.com>
+ *      David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *      Asit Mallick <asit.k.mallick@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ *      Kenneth Chen <kenneth.w.chen@intel.com>
+ *      Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
+ * for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
+ * handler now uses virtual PT.
+ *
+ * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
+ *              Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for
+ * critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
+ *              (12,51)
+ *  entry offset ----/     /         /                  /
+ *  /
+ *  entry number ---------/         /                  /
+ *  /
+ *  size of the entry -------------/                  /
+ *  /
+ *  vector name -------------------------------------/
+ *  /
+ *  interruptions triggering this vector
+ *  ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB
+ * boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+#include "kvm_minstate.h"
+#include "vti.h"
+
+#if 1
+# define PSR_DEFAULT_BITS   psr.ac
+#else
+# define PSR_DEFAULT_BITS   0
+#endif
+
+
+#define KVM_FAULT(n)    \
+    kvm_fault_##n:;          \
+    mov r19=n;;          \
+    br.sptk.many kvm_fault_##n;         \
+    ;;                  \
+
+
+#define KVM_REFLECT(n)    \
+    mov r31=pr;           \
+    mov r19=n;       /* prepare to save predicates */ \
+    mov r29=cr.ipsr;      \
+    ;;      \
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)br.sptk.many kvm_dispatch_reflection;        \
+    br.sptk.many kvm_panic;      \
+
+
+GLOBAL_ENTRY(kvm_panic)
+    br.sptk.many kvm_panic
+    ;;
+END(kvm_panic)
+
+
+
+
+
+    .section .text.ivt,"ax"
+
+    .align 32768    // align on 32KB boundary
+    .global kvm_ia64_ivt
+kvm_ia64_ivt:
+///////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(kvm_vhpt_miss)
+    KVM_FAULT(0)
+END(kvm_vhpt_miss)
+
+
+    .org kvm_ia64_ivt+0x400
+////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(kvm_itlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+    (p6) br.sptk kvm_alt_itlb_miss
+    mov r19 = 1
+    br.sptk kvm_itlb_miss_dispatch
+    KVM_FAULT(1);
+END(kvm_itlb_miss)
+
+    .org kvm_ia64_ivt+0x0800
+//////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(kvm_dtlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk kvm_alt_dtlb_miss
+    br.sptk kvm_dtlb_miss_dispatch
+END(kvm_dtlb_miss)
+
+     .org kvm_ia64_ivt+0x0c00
+////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(kvm_alt_itlb_miss)
+    mov r16=cr.ifa    // get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    mov r24=cr.ipsr
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r17,r19      // insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.i r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
+END(kvm_alt_itlb_miss)
+
+    .org kvm_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(kvm_alt_dtlb_miss)
+    mov r16=cr.ifa		// get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    mov r24=cr.ipsr
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r19,r17	// insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.d r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
+END(kvm_alt_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1400
+//////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(kvm_nested_dtlb_miss)
+    KVM_FAULT(5)
+END(kvm_nested_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(kvm_ikey_miss)
+    KVM_REFLECT(6)
+END(kvm_ikey_miss)
+
+    .org kvm_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(kvm_dkey_miss)
+    KVM_REFLECT(7)
+END(kvm_dkey_miss)
+
+    .org kvm_ia64_ivt+0x2000
+////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(kvm_dirty_bit)
+    KVM_REFLECT(8)
+END(kvm_dirty_bit)
+
+    .org kvm_ia64_ivt+0x2400
+////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(kvm_iaccess_bit)
+    KVM_REFLECT(9)
+END(kvm_iaccess_bit)
+
+    .org kvm_ia64_ivt+0x2800
+///////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(kvm_daccess_bit)
+    KVM_REFLECT(10)
+END(kvm_daccess_bit)
+
+    .org kvm_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(kvm_break_fault)
+    mov r31=pr
+    mov r19=11
+    mov r29=cr.ipsr
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+    mov out0=cr.ifa
+    mov out2=cr.isr     // FIXME: pity to make this slow access twice
+    mov out3=cr.iim     // FIXME: pity to make this slow access twice
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15)ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out1=16,sp
+    br.call.sptk.many b6=kvm_ia64_handle_break
+    ;;
+END(kvm_break_fault)
+
+    .org kvm_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(kvm_interrupt)
+    mov r31=pr		// prepare to save predicates
+    mov r19=12
+    mov r29=cr.ipsr
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT
+    ;;
+(p7) br.sptk kvm_dispatch_interrupt
+    ;;
+    mov r27=ar.rsc		/* M */
+    mov r20=r1			/* A */
+    mov r25=ar.unat		/* M */
+    mov r26=ar.pfs		/* I */
+    mov r28=cr.iip		/* M */
+    cover			/* B (or nothing) */
+    ;;
+    mov r1=sp
+    ;;
+    invala			/* M */
+    mov r30=cr.ifs
+    ;;
+    addl r1=-VMM_PT_REGS_SIZE,r1
+    ;;
+    adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+    adds r16=PT(CR_IPSR),r1
+    ;;
+    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+    st8 [r16]=r29			/* save cr.ipsr */
+    ;;
+    lfetch.fault.excl.nt1 [r17]
+    mov r29=b0
+    ;;
+    adds r16=PT(R8),r1  	/* initialize first base pointer */
+    adds r17=PT(R9),r1  	/* initialize second base pointer */
+    mov r18=r0      		/* make sure r18 isn't NaT */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+        ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+        ;;
+    st8 [r16]=r28,16		/* save cr.iip */
+    st8 [r17]=r30,16		/* save cr.ifs */
+    mov r8=ar.fpsr		/* M */
+    mov r9=ar.csd
+    mov r10=ar.ssd
+    movl r11=FPSR_DEFAULT	/* L-unit */
+    ;;
+    st8 [r16]=r25,16		/* save ar.unat */
+    st8 [r17]=r26,16		/* save ar.pfs */
+    shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+    ;;
+    st8 [r16]=r27,16		/* save ar.rsc */
+    adds r17=16,r17		/* skip over ar_rnat field */
+    ;;
+    st8 [r17]=r31,16		/* save predicates */
+    adds r16=16,r16		/* skip over ar_bspstore field */
+    ;;
+    st8 [r16]=r29,16		/* save b0 */
+    st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+    adds r12=-16,r1
+    /* switch to kernel memory stack (with 16 bytes of scratch) */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+    dep r14=-1,r0,60,4
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+    adds r2=VMM_PT_REGS_R16_OFFSET,r1
+    adds r14 = VMM_VCPU_GP_OFFSET,r13
+    ;;
+    mov r8=ar.ccv
+    ld8 r14 = [r14]
+    ;;
+    mov r1=r14       /* establish kernel global pointer */
+    ;;                                          \
+    bsw.1
+    ;;
+    alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+    mov out0=r13
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    srlz.i			// ensure everybody knows psr.ic is back on
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+    mov r18=b6
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+    mov r19=b7
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+    ;;
+    mov ar.fpsr=r11       /* M-unit */
+    st8 [r2]=r8,8         /* ar.ccv */
+    adds r24=PT(B6)-PT(F7),r3
+    ;;
+    stf.spill [r2]=f6,32
+    stf.spill [r3]=f7,32
+    ;;
+    stf.spill [r2]=f8,32
+    stf.spill [r3]=f9,32
+    ;;
+    stf.spill [r2]=f10
+    stf.spill [r3]=f11
+    adds r25=PT(B7)-PT(F11),r3
+    ;;
+    st8 [r24]=r18,16       /* b6 */
+    st8 [r25]=r19,16       /* b7 */
+    ;;
+    st8 [r24]=r9           /* ar.csd */
+    st8 [r25]=r10          /* ar.ssd */
+    ;;
+    srlz.d		// make sure we see the effect of cr.ivr
+    addl r14=@gprel(ia64_leave_nested),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_ia64_handle_irq
+    ;;
+END(kvm_interrupt)
+
+    .global kvm_dispatch_vexirq
+    .org kvm_ia64_ivt+0x3400
+//////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(kvm_virtual_exirq)
+    mov r31=pr
+    mov r19=13
+    mov r30 =r0
+    ;;
+kvm_dispatch_vexirq:
+    cmp.eq p6,p0 = 1,r30
+    ;;
+(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+(p6)ld8 r1 = [r29]
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r13
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    KVM_SAVE_REST
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_vexirq
+END(kvm_virtual_exirq)
+
+    .org kvm_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+    KVM_FAULT(14)
+    // this code segment is from 2.6.16.13
+
+
+    .org kvm_ia64_ivt+0x3c00
+///////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+    KVM_FAULT(15)
+
+
+    .org kvm_ia64_ivt+0x4000
+///////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+    KVM_FAULT(16)
+
+    .org kvm_ia64_ivt+0x4400
+//////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+    KVM_FAULT(17)
+
+    .org kvm_ia64_ivt+0x4800
+//////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+    KVM_FAULT(18)
+
+    .org kvm_ia64_ivt+0x4c00
+//////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+    KVM_FAULT(19)
+
+    .org kvm_ia64_ivt+0x5000
+//////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(kvm_page_not_present)
+    KVM_REFLECT(20)
+END(kvm_page_not_present)
+
+    .org kvm_ia64_ivt+0x5100
+///////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(kvm_key_permission)
+    KVM_REFLECT(21)
+END(kvm_key_permission)
+
+    .org kvm_ia64_ivt+0x5200
+//////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(kvm_iaccess_rights)
+    KVM_REFLECT(22)
+END(kvm_iaccess_rights)
+
+    .org kvm_ia64_ivt+0x5300
+//////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(kvm_daccess_rights)
+    KVM_REFLECT(23)
+END(kvm_daccess_rights)
+
+    .org kvm_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(kvm_general_exception)
+   KVM_REFLECT(24)
+   KVM_FAULT(24)
+END(kvm_general_exception)
+
+    .org kvm_ia64_ivt+0x5500
+//////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(kvm_disabled_fp_reg)
+    KVM_REFLECT(25)
+END(kvm_disabled_fp_reg)
+
+    .org kvm_ia64_ivt+0x5600
+////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(kvm_nat_consumption)
+    KVM_REFLECT(26)
+END(kvm_nat_consumption)
+
+    .org kvm_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(kvm_speculation_vector)
+    KVM_REFLECT(27)
+END(kvm_speculation_vector)
+
+    .org kvm_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+    KVM_FAULT(28)
+
+    .org kvm_ia64_ivt+0x5900
+///////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(kvm_debug_vector)
+    KVM_FAULT(29)
+END(kvm_debug_vector)
+
+    .org kvm_ia64_ivt+0x5a00
+///////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(kvm_unaligned_access)
+    KVM_REFLECT(30)
+END(kvm_unaligned_access)
+
+    .org kvm_ia64_ivt+0x5b00
+//////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(kvm_unsupported_data_reference)
+    KVM_REFLECT(31)
+END(kvm_unsupported_data_reference)
+
+    .org kvm_ia64_ivt+0x5c00
+////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
+ENTRY(kvm_floating_point_fault)
+    KVM_REFLECT(32)
+END(kvm_floating_point_fault)
+
+    .org kvm_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(kvm_floating_point_trap)
+    KVM_REFLECT(33)
+END(kvm_floating_point_trap)
+
+    .org kvm_ia64_ivt+0x5e00
+//////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(kvm_lower_privilege_trap)
+    KVM_REFLECT(34)
+END(kvm_lower_privilege_trap)
+
+    .org kvm_ia64_ivt+0x5f00
+//////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(kvm_taken_branch_trap)
+    KVM_REFLECT(35)
+END(kvm_taken_branch_trap)
+
+    .org kvm_ia64_ivt+0x6000
+////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(kvm_single_step_trap)
+    KVM_REFLECT(36)
+END(kvm_single_step_trap)
+    .global kvm_virtualization_fault_back
+    .org kvm_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(kvm_virtualization_fault)
+    mov r31=pr
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    st8 [r16] = r1
+    adds r17 = VMM_VCPU_GP_OFFSET, r21
+    ;;
+    ld8 r1 = [r17]
+    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+    cmp.eq p9,p0=EVENT_RSM,r24
+    cmp.eq p10,p0=EVENT_SSM,r24
+    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+    cmp.eq p12,p0=EVENT_THASH,r24
+    (p6) br.dptk.many kvm_asm_mov_from_ar
+    (p7) br.dptk.many kvm_asm_mov_from_rr
+    (p8) br.dptk.many kvm_asm_mov_to_rr
+    (p9) br.dptk.many kvm_asm_rsm
+    (p10) br.dptk.many kvm_asm_ssm
+    (p11) br.dptk.many kvm_asm_mov_to_psr
+    (p12) br.dptk.many kvm_asm_thash
+    ;;
+kvm_virtualization_fault_back:
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    ld8 r1 = [r16]
+    ;;
+    mov r19=37
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    cmp.ne p6,p0=EVENT_RFI, r24
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r18=VMM_VPD_VIFS_OFFSET,r18
+    ;;
+    ld8 r18=[r18]
+    ;;
+    tbit.z p6,p0=r18,63
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    //if vifs.v=1 desert current register frame
+    alloc r18=ar.pfs,0,0,0,0
+    br.sptk kvm_dispatch_virtualization_fault
+END(kvm_virtualization_fault)
+
+    .org kvm_ia64_ivt+0x6200
+//////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+    KVM_FAULT(38)
+
+    .org kvm_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+    KVM_FAULT(39)
+
+    .org kvm_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+    KVM_FAULT(40)
+
+    .org kvm_ia64_ivt+0x6500
+//////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+    KVM_FAULT(41)
+
+    .org kvm_ia64_ivt+0x6600
+//////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+    KVM_FAULT(42)
+
+    .org kvm_ia64_ivt+0x6700
+//////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+    KVM_FAULT(43)
+
+    .org kvm_ia64_ivt+0x6800
+//////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+    KVM_FAULT(44)
+
+    .org kvm_ia64_ivt+0x6900
+///////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
+//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(kvm_ia32_exception)
+    KVM_FAULT(45)
+END(kvm_ia32_exception)
+
+    .org kvm_ia64_ivt+0x6a00
+////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(kvm_ia32_intercept)
+    KVM_FAULT(47)
+END(kvm_ia32_intercept)
+
+    .org kvm_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+    KVM_FAULT(48)
+
+    .org kvm_ia64_ivt+0x6d00
+//////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+    KVM_FAULT(49)
+
+    .org kvm_ia64_ivt+0x6e00
+//////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+    KVM_FAULT(50)
+
+    .org kvm_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+    KVM_FAULT(52)
+
+    .org kvm_ia64_ivt+0x7100
+////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+    KVM_FAULT(53)
+
+    .org kvm_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+    KVM_FAULT(54)
+
+    .org kvm_ia64_ivt+0x7300
+////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+    KVM_FAULT(55)
+
+    .org kvm_ia64_ivt+0x7400
+////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+    KVM_FAULT(56)
+
+    .org kvm_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+    KVM_FAULT(57)
+
+    .org kvm_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+    KVM_FAULT(58)
+
+    .org kvm_ia64_ivt+0x7700
+////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+    KVM_FAULT(59)
+
+    .org kvm_ia64_ivt+0x7800
+////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+    KVM_FAULT(60)
+
+    .org kvm_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+    KVM_FAULT(61)
+
+    .org kvm_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+    KVM_FAULT(62)
+
+    .org kvm_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+    KVM_FAULT(63)
+
+    .org kvm_ia64_ivt+0x7c00
+////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+    KVM_FAULT(64)
+
+    .org kvm_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+    KVM_FAULT(65)
+
+    .org kvm_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+    KVM_FAULT(66)
+
+    .org kvm_ia64_ivt+0x7f00
+////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+    KVM_FAULT(67)
+
+    .org kvm_ia64_ivt+0x8000
+// There is no particular reason for this code to be here, other than that
+// there happens to be space here that would go unused otherwise.  If this
+// fault ever gets "unreserved", simply moved the following code to a more
+// suitable spot...
+
+
+ENTRY(kvm_dtlb_miss_dispatch)
+    mov r19 = 2
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
+END(kvm_dtlb_miss_dispatch)
+
+ENTRY(kvm_itlb_miss_dispatch)
+
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
+END(kvm_itlb_miss_dispatch)
+
+ENTRY(kvm_dispatch_reflection)
+    /*
+     * Input:
+     *  psr.ic: off
+     *  r19:    intr type (offset into ivt, see ia64_int.h)
+     *  r31:    contains saved predicates (pr)
+     */
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,5,0
+    mov out0=cr.ifa
+    mov out1=cr.isr
+    mov out2=cr.iim
+    mov out3=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out4=16,r12
+    br.call.sptk.many b6=reflect_interruption
+END(kvm_dispatch_reflection)
+
+ENTRY(kvm_dispatch_virtualization_fault)
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+    mov out0=r13        //vcpu
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out1=16,sp         //regs
+    br.call.sptk.many b6=kvm_emulate
+END(kvm_dispatch_virtualization_fault)
+
+
+ENTRY(kvm_dispatch_interrupt)
+    KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+    ;;
+    alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+    //mov out0=cr.ivr		// pass cr.ivr as first arg
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    mov out0=r13		// pass pointer to pt_regs as second arg
+    br.call.sptk.many b6=kvm_ia64_handle_irq
+END(kvm_dispatch_interrupt)
+
+
+
+
+GLOBAL_ENTRY(ia64_leave_nested)
+	rsm psr.i
+	;;
+	adds r21=PT(PR)+16,r12
+	;;
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3]
+	adds r3=PT(AR_CSD)-PT(R16),r3
+	adds r30=PT(AR_CCV)+16,r12
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	srlz.i
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+	ldf.fill f11=[r2]
+//	mov r18=r13
+//    mov r21=r13
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0
+	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r22=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+	;;
+	ld8.fill r3=[r16]
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	;;
+	mov b0=r22
+	mov ar.pfs=r26
+	mov cr.ifs=r30
+	mov cr.ipsr=r29
+	mov ar.fpsr=r20
+	mov cr.iip=r28
+	;;
+	mov ar.rsc=r27
+	mov ar.unat=r25
+	mov pr=r31,-1
+	rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
+    /*
+     * work.need_resched etc. mustn't get changed
+     *by this CPU before it returns to
+    ;;
+     * user- or fsys-mode, hence we disable interrupts early on:
+     */
+    adds r2 = PT(R4)+16,r12
+    adds r3 = PT(R5)+16,r12
+    adds r8 = PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8 = [r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    ld8.fill r4=[r2],16    //load r4
+    ld8.fill r5=[r3],16    //load r5
+    ;;
+    ld8.fill r6=[r2]    //load r6
+    ld8.fill r7=[r3]    //load r7
+    ;;
+END(ia64_leave_hypervisor_prepare)
+//fall through
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+    rsm psr.i
+    ;;
+    br.call.sptk.many b0=leave_hypervisor_tail
+    ;;
+    adds r20=PT(PR)+16,r12
+    adds r8=PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8=[r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    lfetch [r20],PT(CR_IPSR)-PT(PR)
+    adds r2 = PT(B6)+16,r12
+    adds r3 = PT(B7)+16,r12
+    ;;
+    lfetch [r20]
+    ;;
+    ld8 r24=[r2],16        /* B6 */
+    ld8 r25=[r3],16        /* B7 */
+    ;;
+    ld8 r26=[r2],16        /* ar_csd */
+    ld8 r27=[r3],16        /* ar_ssd */
+    mov b6 = r24
+    ;;
+    ld8.fill r8=[r2],16
+    ld8.fill r9=[r3],16
+    mov b7 = r25
+    ;;
+    mov ar.csd = r26
+    mov ar.ssd = r27
+    ;;
+    ld8.fill r10=[r2],PT(R15)-PT(R10)
+    ld8.fill r11=[r3],PT(R14)-PT(R11)
+    ;;
+    ld8.fill r15=[r2],PT(R16)-PT(R15)
+    ld8.fill r14=[r3],PT(R17)-PT(R14)
+    ;;
+    ld8.fill r16=[r2],16
+    ld8.fill r17=[r3],16
+    ;;
+    ld8.fill r18=[r2],16
+    ld8.fill r19=[r3],16
+    ;;
+    ld8.fill r20=[r2],16
+    ld8.fill r21=[r3],16
+    ;;
+    ld8.fill r22=[r2],16
+    ld8.fill r23=[r3],16
+    ;;
+    ld8.fill r24=[r2],16
+    ld8.fill r25=[r3],16
+    ;;
+    ld8.fill r26=[r2],16
+    ld8.fill r27=[r3],16
+    ;;
+    ld8.fill r28=[r2],16
+    ld8.fill r29=[r3],16
+    ;;
+    ld8.fill r30=[r2],PT(F6)-PT(R30)
+    ld8.fill r31=[r3],PT(F7)-PT(R31)
+    ;;
+    rsm psr.i | psr.ic
+    // initiate turning off of interrupt and interruption collection
+    invala          // invalidate ALAT
+    ;;
+    srlz.i          // ensure interruption collection is off
+    ;;
+    bsw.0
+    ;;
+    adds r16 = PT(CR_IPSR)+16,r12
+    adds r17 = PT(CR_IIP)+16,r12
+    mov r21=r13		// get current
+    ;;
+    ld8 r31=[r16],16    // load cr.ipsr
+    ld8 r30=[r17],16    // load cr.iip
+    ;;
+    ld8 r29=[r16],16    // load cr.ifs
+    ld8 r28=[r17],16    // load ar.unat
+    ;;
+    ld8 r27=[r16],16    // load ar.pfs
+    ld8 r26=[r17],16    // load ar.rsc
+    ;;
+    ld8 r25=[r16],16    // load ar.rnat
+    ld8 r24=[r17],16    // load ar.bspstore
+    ;;
+    ld8 r23=[r16],16    // load predicates
+    ld8 r22=[r17],16    // load b0
+    ;;
+    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+    ld8.fill r1=[r17],16    //load r1
+    ;;
+    ld8.fill r12=[r16],16    //load r12
+    ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+    ;;
+    ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+    ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+    ;;
+    ld8.fill r3=[r16]	//load r3
+    ld8 r18=[r17]	//load ar_ccv
+    ;;
+    mov ar.fpsr=r19
+    mov ar.ccv=r18
+    shr.u r18=r20,16
+    ;;
+kvm_rbs_switch:
+    mov r19=96
+
+kvm_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse	p6
+#   define pReturn	p7
+#   define Nregs	14
+
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+    sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+    ;;
+    mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+    shladd in0=loc1,3,r19
+    mov in1=0
+    ;;
+    TEXT_ALIGN(32)
+kvm_rse_clear_invalid:
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    cmp.lt pRecurse,p0=Nregs*8,in0
+    // if more than Nregs regs left to clear, (re)curse
+    add out0=-Nregs*8,in0
+    add out1=1,in1		// increment recursion count
+    mov loc1=0
+    mov loc2=0
+    ;;
+    mov loc3=0
+    mov loc4=0
+    mov loc5=0
+    mov loc6=0
+    mov loc7=0
+(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
+    ;;
+    mov loc8=0
+    mov loc9=0
+    cmp.ne pReturn,p0=r0,in1
+    // if recursion count != 0, we need to do a br.ret
+    mov loc10=0
+    mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#	undef pRecurse
+#	undef pReturn
+
+// loadrs has already been shifted
+    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+    ;;
+    loadrs
+    ;;
+    mov ar.bspstore=r24
+    ;;
+    mov ar.unat=r28
+    mov ar.rnat=r25
+    mov ar.rsc=r26
+    ;;
+    mov cr.ipsr=r31
+    mov cr.iip=r30
+    mov cr.ifs=r29
+    mov ar.pfs=r27
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]   //vpd
+    adds r17=VMM_VCPU_ISR_OFFSET,r21
+    ;;
+    ld8 r17=[r17]
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]        //vpsr
+    adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
+    ;;
+    ld8 r20=[r20]
+    ;;
+//vsa_sync_write_start
+    mov r25=r18
+    adds r16= VMM_VCPU_GP_OFFSET,r21
+    ;;
+    ld8 r16= [r16] // Put gp in r24
+    movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+    ;;
+    add  r24=r24,r16
+    ;;
+    add r16=PAL_VPS_SYNC_WRITE,r20
+    ;;
+    mov b0=r16
+    br.cond.sptk b0         // call the service
+    ;;
+END(ia64_leave_hypervisor)
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ *  must be at bank 0
+ *  parameter:
+ *  r17:cr.isr
+ *  r18:vpd
+ *  r19:vpsr
+ *  r20:__vsa_base
+ *  r22:b0
+ *  r23:predicate
+ */
+    mov r24=r22
+    mov r25=r18
+    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p1) br.sptk.many ia64_vmm_entry_out
+    ;;
+    tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT		//p1=cr.isr.ir
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+    (p2) ld8 r26=[r25]
+    ;;
+ia64_vmm_entry_out:
+    mov pr=r23,-2
+    mov b0=r29
+    ;;
+    br.cond.sptk b0             // call pal service
+END(ia64_vmm_entry)
+
+
+
+/*
+ * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
+ *                  u64 arg3, u64 arg4, u64 arg5,
+ *                  u64 arg6, u64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ *  rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+    .regstk 4,4,0,0
+
+rpsave  =   loc0
+pfssave =   loc1
+psrsave =   loc2
+entry   =   loc3
+hostret =   r24
+
+    alloc   pfssave=ar.pfs,4,4,0,0
+    mov rpsave=rp
+    adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+    ;;
+    ld8 entry=[entry]
+1:  mov hostret=ip
+    mov r25=in1         // copy arguments
+    mov r26=in2
+    mov r27=in3
+    mov psrsave=psr
+    ;;
+    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+    ;;
+    add hostret=2f-1b,hostret   // calculate return address
+    add entry=entry,in0
+    ;;
+    rsm psr.i | psr.ic
+    ;;
+    srlz.i
+    mov b6=entry
+    br.cond.sptk b6         // call the service
+2:
+    // Architectural sequence for enabling interrupts if necessary
+(p7)    ssm psr.ic
+    ;;
+(p7)    srlz.i
+    ;;
+//(p6)    ssm psr.i
+    ;;
+    mov rp=rpsave
+    mov ar.pfs=pfssave
+    mov r8=r31
+    ;;
+    srlz.d
+    br.ret.sptk rp
+
+END(ia64_call_vsa)
+
+#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
+
+GLOBAL_ENTRY(vmm_reset_entry)
+    //set up ipsr, iip, vpd.vpsr, dcr
+    // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+    // For DCR: all bits 0
+    adds r14=-VMM_PT_REGS_SIZE, r12
+    ;;
+    movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+    movl r10=0x8000000000000000
+    adds r16=PT(CR_IIP), r14
+    adds r20=PT(R1), r14
+    ;;
+    rsm psr.ic | psr.i
+    ;;
+    srlz.i
+    ;;
+    bsw.0
+    ;;
+    mov r21 =r13
+    ;;
+    bsw.1
+    ;;
+    mov ar.rsc = 0
+    ;;
+    flushrs
+    ;;
+    mov ar.bspstore = 0
+    // clear BSPSTORE
+    ;;
+    mov cr.ipsr=r6
+    mov cr.ifs=r10
+    ld8 r4 = [r16] // Set init iip for first run.
+    ld8 r1 = [r20]
+    ;;
+    mov cr.iip=r4
+    ;;
+    adds r16=VMM_VPD_BASE_OFFSET,r13
+    adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
+    ;;
+    ld8 r18=[r16]
+    ld8 r20=[r20]
+    ;;
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]
+    mov r17=r0
+    mov r22=r0
+    mov r23=r0
+    br.cond.sptk ia64_vmm_entry
+    br.ret.sptk  b0
+END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 0000000..f6c5617
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
+/*
+ * vti.h: prototype for generial vt related interface
+ *   	Copyright (c) 2004, Intel Corporation.
+ *
+ *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ *	Fred Yang (fred.yang@intel.com)
+ * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ *
+ *  	Copyright (c) 2007, Intel Corporation.
+ *  	Zhang xiantao <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef _KVM_VT_I_H
+#define _KVM_VT_I_H
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+
+#include <linux/kvm_host.h>
+
+/* define itr.i and itr.d  in ia64_itr function */
+#define	ITR	0x01
+#define	DTR	0x02
+#define	IaDTR	0x03
+
+#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
+#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
+
+#define RR6 (6UL<<61)
+#define RR7 (7UL<<61)
+
+
+/* config_options in pal_vp_init_env */
+#define	VP_INITIALIZE	1UL
+#define	VP_FR_PMC	1UL<<1
+#define	VP_OPCODE	1UL<<8
+#define	VP_CAUSE	1UL<<9
+#define VP_FW_ACC   	1UL<<63
+
+/* init vp env with initializing vm_buffer */
+#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
+	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
+/* init vp env without initializing vm_buffer */
+#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
+
+#define		PAL_VP_CREATE   265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+ */
+#define		PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define		PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define		PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define		PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define		PAL_VP_RESUME   270
+/* Renamed from PAL_VP_RESUME */
+#define		PAL_VP_RESTORE  270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define		PAL_VP_SUSPEND  271
+/* Renamed from PAL_VP_SUSPEND */
+#define		PAL_VP_SAVE	271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define		PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+union vac {
+	unsigned long value;
+	struct {
+		int a_int:1;
+		int a_from_int_cr:1;
+		int a_to_int_cr:1;
+		int a_from_psr:1;
+		int a_from_cpuid:1;
+		int a_cover:1;
+		int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		int d_vmsw:1;
+		int d_extint:1;
+		int d_ibr_dbr:1;
+		int d_pmc:1;
+		int d_to_pmd:1;
+		int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct vpd {
+	union vac   vac;
+	union vdc   vdc;
+	unsigned long  virt_env_vaddr;
+	unsigned long  reserved1[29];
+	unsigned long  vhpi;
+	unsigned long  reserved2[95];
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  reserved3[11];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	unsigned long  reserved4[76];
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	unsigned long  reserved5[128];
+	unsigned long  reserved6[3456];
+	unsigned long  vmm_avail[128];
+	unsigned long  reserved7[4096];
+};
+
+#define PAL_PROC_VM_BIT		(1UL << 40)
+#define PAL_PROC_VMSW_BIT	(1UL << 54)
+
+static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
+		u64 *vp_env_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+	*buffer_size = iprv.v0;
+	*vp_env_info = iprv.v1;
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_exit_env(u64 iva)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
+			u64 vbase_addr, u64 *vsa_base)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
+			vbase_addr);
+	*vsa_base = iprv.v0;
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+#endif
+
+/*VPD field offset*/
+#define VPD_VAC_START_OFFSET		0
+#define VPD_VDC_START_OFFSET		8
+#define VPD_VHPI_START_OFFSET		256
+#define VPD_VGR_START_OFFSET		1024
+#define VPD_VBGR_START_OFFSET		1152
+#define VPD_VNAT_START_OFFSET		1280
+#define VPD_VBNAT_START_OFFSET		1288
+#define VPD_VCPUID_START_OFFSET		1296
+#define VPD_VPSR_START_OFFSET		1424
+#define VPD_VPR_START_OFFSET		1432
+#define VPD_VRSE_CFLE_START_OFFSET	1440
+#define VPD_VCR_START_OFFSET		2048
+#define VPD_VTPR_START_OFFSET		2576
+#define VPD_VRR_START_OFFSET		3072
+#define VPD_VMM_VAIL_START_OFFSET	31744
+
+/*Virtualization faults*/
+
+#define EVENT_MOV_TO_AR			 1
+#define EVENT_MOV_TO_AR_IMM		 2
+#define EVENT_MOV_FROM_AR		 3
+#define EVENT_MOV_TO_CR			 4
+#define EVENT_MOV_FROM_CR		 5
+#define EVENT_MOV_TO_PSR		 6
+#define EVENT_MOV_FROM_PSR		 7
+#define EVENT_ITC_D			 8
+#define EVENT_ITC_I			 9
+#define EVENT_MOV_TO_RR			 10
+#define EVENT_MOV_TO_DBR		 11
+#define EVENT_MOV_TO_IBR		 12
+#define EVENT_MOV_TO_PKR		 13
+#define EVENT_MOV_TO_PMC		 14
+#define EVENT_MOV_TO_PMD		 15
+#define EVENT_ITR_D			 16
+#define EVENT_ITR_I			 17
+#define EVENT_MOV_FROM_RR		 18
+#define EVENT_MOV_FROM_DBR		 19
+#define EVENT_MOV_FROM_IBR		 20
+#define EVENT_MOV_FROM_PKR		 21
+#define EVENT_MOV_FROM_PMC		 22
+#define EVENT_MOV_FROM_CPUID		 23
+#define EVENT_SSM			 24
+#define EVENT_RSM			 25
+#define EVENT_PTC_L			 26
+#define EVENT_PTC_G			 27
+#define EVENT_PTC_GA			 28
+#define EVENT_PTR_D			 29
+#define EVENT_PTR_I			 30
+#define EVENT_THASH			 31
+#define EVENT_TTAG			 32
+#define EVENT_TPA			 33
+#define EVENT_TAK			 34
+#define EVENT_PTC_E			 35
+#define EVENT_COVER			 36
+#define EVENT_RFI			 37
+#define EVENT_BSW_0			 38
+#define EVENT_BSW_1			 39
+#define EVENT_VMSW			 40
+
+/**PAL virtual services offsets */
+#define PAL_VPS_RESUME_NORMAL           0x0000
+#define PAL_VPS_RESUME_HANDLER          0x0400
+#define PAL_VPS_SYNC_READ               0x0800
+#define PAL_VPS_SYNC_WRITE              0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
+#define PAL_VPS_THASH                   0x1400
+#define PAL_VPS_TTAG                    0x1800
+#define PAL_VPS_RESTORE                 0x1c00
+#define PAL_VPS_SAVE                    0x2000
+
+#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 0000000..def4576
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,636 @@
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "vcpu.h"
+
+#include <linux/rwsem.h>
+
+#include <asm/tlb.h>
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+
+static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
+{
+	return ((trp->p) && (trp->rid == rid)
+				&& ((va-trp->vadr) < PSIZE(trp->ps)));
+}
+
+/*
+ * Only for GUEST TR format.
+ */
+static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
+{
+	u64 sa1, ea1;
+
+	if (!trp->p || trp->rid != rid)
+		return 0;
+
+	sa1 = trp->vadr;
+	ea1 = sa1 + PSIZE(trp->ps) - 1;
+	eva -= 1;
+	if ((sva > ea1) || (sa1 > eva))
+		return 0;
+	else
+		return 1;
+
+}
+
+void machine_tlb_purge(u64 va, u64 ps)
+{
+	ia64_ptcl(va, ps << 2);
+}
+
+void local_flush_tlb_all(void)
+{
+	int i, j;
+	unsigned long flags, count0, count1;
+	unsigned long stride0, stride1, addr;
+
+	addr    = current_vcpu->arch.ptce_base;
+	count0  = current_vcpu->arch.ptce_count[0];
+	count1  = current_vcpu->arch.ptce_count[1];
+	stride0 = current_vcpu->arch.ptce_stride[0];
+	stride1 = current_vcpu->arch.ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();          /* srlz.i implies srlz.d */
+}
+
+int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
+{
+	union ia64_rr    vrr;
+	union ia64_pta   vpta;
+	struct  ia64_psr   vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vpta.val = vcpu_get_pta(vcpu);
+
+	if (vrr.ve & vpta.ve) {
+		switch (ref) {
+		case DATA_REF:
+		case NA_REF:
+			return vpsr.dt;
+		case INST_REF:
+			return vpsr.dt && vpsr.it && vpsr.ic;
+		case RSE_REF:
+			return vpsr.dt && vpsr.rt;
+
+		}
+	}
+	return 0;
+}
+
+struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
+{
+	u64 index, pfn, rid, pfn_bits;
+
+	pfn_bits = vpta.size - 5 - 8;
+	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+	rid = _REGION_ID(vrr);
+	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
+	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
+
+	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
+				(index << 5));
+}
+
+struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
+{
+
+	struct thash_data *trp;
+	int  i;
+	u64 rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;;
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+						i < NDTRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
+{
+	union ia64_rr rr;
+	struct thash_data *head;
+	unsigned long ps, gpaddr;
+
+	ps = itir_ps(itir);
+
+	gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+		(ifa & ((1UL << ps) - 1));
+
+	rr.val = ia64_get_rr(ifa);
+	head = (struct thash_data *)ia64_thash(ifa);
+	head->etag = INVALID_TI_TAG;
+	ia64_mf();
+	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
+	head->itir = rr.ps << 2;
+	head->etag = ia64_ttag(ifa);
+	head->gpaddr = gpaddr;
+}
+
+void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
+{
+	u64 i, dirty_pages = 1;
+	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
+	spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
+	void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+						+ KVM_MEM_DIRTY_LOG_OFS;
+	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
+
+	vmm_spin_lock(lock);
+	for (i = 0; i < dirty_pages; i++) {
+		/* avoid RMW */
+		if (!test_bit(base_gfn + i, dirty_bitmap))
+			set_bit(base_gfn + i , dirty_bitmap);
+	}
+	vmm_spin_unlock(lock);
+}
+
+void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
+{
+	u64 phy_pte, psr;
+	union ia64_rr mrr;
+
+	mrr.val = ia64_get_rr(va);
+	phy_pte = translate_phy_pte(&pte, itir, va);
+
+	if (itir_ps(itir) >= mrr.ps) {
+		vhpt_insert(phy_pte, itir, va, pte);
+	} else {
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, va, phy_pte, itir_ps(itir));
+		ia64_set_psr(psr);
+	}
+
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, itir_ps(itir));
+}
+
+/*
+ *   vhpt lookup
+ */
+struct thash_data *vhpt_lookup(u64 va)
+{
+	struct thash_data *head;
+	u64 tag;
+
+	head = (struct thash_data *)ia64_thash(va);
+	tag = ia64_ttag(va);
+	if (head->etag == tag)
+		return head;
+	return NULL;
+}
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+	u64 ret;
+	struct thash_data *data;
+
+	data = __vtr_lookup(current_vcpu, iha, D_TLB);
+	if (data != NULL)
+		thash_vhpt_insert(current_vcpu, data->page_flags,
+			data->itir, iha, D_TLB);
+
+	asm volatile ("rsm psr.ic|psr.i;;"
+			"srlz.d;;"
+			"ld8.s r9=[%1];;"
+			"tnat.nz p6,p7=r9;;"
+			"(p6) mov %0=1;"
+			"(p6) mov r9=r0;"
+			"(p7) extr.u r9=r9,0,53;;"
+			"(p7) mov %0=r0;"
+			"(p7) st8 [%2]=r9;;"
+			"ssm psr.ic;;"
+			"srlz.d;;"
+			/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
+			: "=r"(ret) : "r"(iha), "r"(pte):"memory");
+
+	return ret;
+}
+
+/*
+ *  purge software guest tlb
+ */
+
+static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, curadr, size, psbits, tag, rr_ps, num;
+	union ia64_rr vrr;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	psbits = VMX(v, psbits[(va >> 61)]);
+	start = va & ~((1UL << ps) - 1);
+	while (psbits) {
+		curadr = start;
+		rr_ps = __ffs(psbits);
+		psbits &= ~(1UL << rr_ps);
+		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
+		size = PSIZE(rr_ps);
+		vrr.ps = rr_ps;
+		while (num) {
+			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
+			if (cur->etag == tag && cur->ps == rr_ps)
+				cur->etag = INVALID_TI_TAG;
+			curadr += size;
+			num--;
+		}
+	}
+}
+
+
+/*
+ *  purge VHPT and machine TLB
+ */
+static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, size, tag, num;
+	union ia64_rr rr;
+
+	start = va & ~((1UL << ps) - 1);
+	rr.val = ia64_get_rr(va);
+	size = PSIZE(rr.ps);
+	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
+	while (num) {
+		cur = (struct thash_data *)ia64_thash(start);
+		tag = ia64_ttag(start);
+		if (cur->etag == tag)
+			cur->etag = INVALID_TI_TAG;
+		start += size;
+		num--;
+	}
+	machine_tlb_purge(va, ps);
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ *  1: When inserting VHPT to thash, "va" is a must covered
+ *  address by the inserted machine VHPT entry.
+ *  2: The format of entry is always in TLB.
+ *  3: The caller need to make sure the new entry will not overlap
+ *     with any existed entry.
+ */
+void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
+{
+	struct thash_data *head;
+	union ia64_rr vrr;
+	u64 tag;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	vrr.ps = itir_ps(itir);
+	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
+	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
+	head->page_flags = pte;
+	head->itir = itir;
+	head->etag = tag;
+}
+
+int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
+{
+	struct thash_data  *trp;
+	int  i;
+	u64 end, rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	end = va + PSIZE(ps);
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+					i < NDTRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	}
+	return -1;
+}
+
+/*
+ * Purge entries in VTLB and VHPT
+ */
+void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	if (vcpu_quick_region_check(v->arch.tc_regions, va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	u64 old_va = va;
+	va = REGION_OFFSET(va);
+	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
+{
+	u64 ps, ps_mask, paddr, maddr;
+	union pte_flags phy_pte;
+
+	ps = itir_ps(itir);
+	ps_mask = ~((1UL << ps) - 1);
+	phy_pte.val = *pte;
+	paddr = *pte;
+	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
+	maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
+	if (maddr & GPFN_IO_MASK) {
+		*pte |= VTLB_PTE_IO;
+		return -1;
+	}
+	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
+					(paddr & ~PAGE_MASK);
+	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
+	return phy_pte.val;
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ *    Notes: Only TC entry can purge and insert.
+ *    1 indicates this is MMIO
+ */
+int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+						u64 ifa, int type)
+{
+	u64 ps;
+	u64 phy_pte;
+	union ia64_rr vrr, mrr;
+	int ret = 0;
+
+	ps = itir_ps(itir);
+	vrr.val = vcpu_get_rr(v, ifa);
+	mrr.val = ia64_get_rr(ifa);
+
+	phy_pte = translate_phy_pte(&pte, itir, ifa);
+
+	/* Ensure WB attribute if pte is related to a normal mem page,
+	 * which is required by vga acceleration since qemu maps shared
+	 * vram buffer with WB.
+	 */
+	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
+		pte &= ~_PAGE_MA_MASK;
+		phy_pte &= ~_PAGE_MA_MASK;
+	}
+
+	if (pte & VTLB_PTE_IO)
+		ret = 1;
+
+	vtlb_purge(v, ifa, ps);
+	vhpt_purge(v, ifa, ps);
+
+	if (ps == mrr.ps) {
+		if (!(pte&VTLB_PTE_IO)) {
+			vhpt_insert(phy_pte, itir, ifa, pte);
+		} else {
+			vtlb_insert(v, pte, itir, ifa);
+			vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+		}
+	} else if (ps > mrr.ps) {
+		vtlb_insert(v, pte, itir, ifa);
+		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+		if (!(pte&VTLB_PTE_IO))
+			vhpt_insert(phy_pte, itir, ifa, pte);
+	} else {
+		u64 psr;
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, ifa, phy_pte, ps);
+		ia64_set_psr(psr);
+	}
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, ps);
+
+	return ret;
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+void thash_purge_all(struct kvm_vcpu *v)
+{
+	int i;
+	struct thash_data *head;
+	struct thash_cb  *vtlb, *vhpt;
+	vtlb = &v->arch.vtlb;
+	vhpt = &v->arch.vhpt;
+
+	for (i = 0; i < 8; i++)
+		VMX(v, psbits[i]) = 0;
+
+	head = vtlb->hash;
+	for (i = 0; i < vtlb->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	head = vhpt->hash;
+	for (i = 0; i < vhpt->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	local_flush_tlb_all();
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ *  in: TLB format for both VHPT & TLB.
+ */
+
+struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
+{
+	struct thash_data  *cch;
+	u64    psbits, ps, tag;
+	union ia64_rr vrr;
+
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	cch = __vtr_lookup(v, va, is_data);;
+	if (cch)
+		return cch;
+
+	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
+		return NULL;
+
+	psbits = VMX(v, psbits[(va >> 61)]);
+	vrr.val = vcpu_get_rr(v, va);
+	while (psbits) {
+		ps = __ffs(psbits);
+		psbits &= ~(1UL << ps);
+		vrr.ps = ps;
+		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
+		if (cch->etag == tag && cch->ps == ps)
+			return cch;
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(struct thash_cb *hcb, u64 sz)
+{
+	int i;
+	struct thash_data *head;
+
+	hcb->pta.val = (unsigned long)hcb->hash;
+	hcb->pta.vf = 1;
+	hcb->pta.ve = 1;
+	hcb->pta.size = sz;
+	head = hcb->hash;
+	for (i = 0; i < hcb->num; i++) {
+		head->page_flags = 0;
+		head->itir = 0;
+		head->etag = INVALID_TI_TAG;
+		head->next = 0;
+		head++;
+	}
+}
+
+u64 kvm_lookup_mpa(u64 gpfn)
+{
+	u64 *base = (u64 *) KVM_P2M_BASE;
+	return *(base + gpfn);
+}
+
+u64 kvm_gpa_to_mpa(u64 gpa)
+{
+	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
+	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
+}
+
+
+/*
+ * Fetch guest bundle code.
+ * INPUT:
+ *  gip: guest ip
+ *  pbundle: used to return fetched bundle.
+ */
+int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
+{
+	u64     gpip = 0;   /* guest physical IP*/
+	u64     *vpa;
+	struct thash_data    *tlb;
+	u64     maddr;
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
+		/* I-side physical mode */
+		gpip = gip;
+	} else {
+		tlb = vtlb_lookup(vcpu, gip, I_TLB);
+		if (tlb)
+			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
+				(gip & (PSIZE(tlb->ps) - 1));
+	}
+	if (gpip) {
+		maddr = kvm_gpa_to_mpa(gpip);
+	} else {
+		tlb = vhpt_lookup(gip);
+		if (tlb == NULL) {
+			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
+			return IA64_FAULT;
+		}
+		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
+					| (gip & (PSIZE(tlb->ps) - 1));
+	}
+	vpa = (u64 *)__kvm_va(maddr);
+
+	pbundle->i64[0] = *vpa++;
+	pbundle->i64[1] = *vpa;
+
+	return IA64_NO_FAULT;
+}
+
+
+void kvm_init_vhpt(struct kvm_vcpu *v)
+{
+	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&v->arch.vhpt, VHPT_SHIFT);
+	ia64_set_pta(v->arch.vhpt.pta.val);
+	/*Enable VHPT here?*/
+}
+
+void kvm_init_vtlb(struct kvm_vcpu *v)
+{
+	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&v->arch.vtlb, VTLB_SHIFT);
+}
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 344f64e..798bf98 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -45,8 +45,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
@@ -255,7 +253,7 @@ paging_init (void)
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ee5e68b..544dc42 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_early_cpu(cpu)
 		if (node == node_cpuid[cpu].nid)
 			n++;
 
@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
 	pernodesize += node * L1_CACHE_BYTES;
 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize = PAGE_ALIGN(pernodesize);
 	return pernodesize;
 }
@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		if (node == node_cpuid[cpu].nid) {
 			memcpy(__va(cpu_data), __phys_per_cpu_start,
 			       __per_cpu_end - __per_cpu_start);
@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
 
 #ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
 		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
 	}
@@ -444,7 +445,7 @@ void __init find_memory(void)
 			mem_data[node].min_pfn = ~0UL;
 		}
 
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
 	int cpu;
 	static int first_time = 1;
 
-
-	if (smp_processor_id() != 0)
-		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-
 	if (first_time) {
 		first_time = 0;
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
+		for_each_possible_early_cpu(cpu)
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 	}
 
@@ -522,8 +519,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index a4ca657..fc6c663 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
-	unsigned long order;
 
 	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
@@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	if (PageCompound(page)) {
-		order = compound_order(page);
-		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
-	}
-	else
-		flush_icache_range(addr, addr + PAGE_SIZE);
+	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
@@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
 int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
 {
-	int nid = paddr_to_nid(__pa(start));
+	u64 end = start + len;
 
-	if (nid < 0)
-		nid = 0;
 #ifdef CONFIG_KEXEC
 	if (start > crashk_res.start && start < crashk_res.end)
 		start = crashk_res.end;
@@ -690,15 +682,6 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	pg_data_t *pgdat;
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 7807fc5..b73bf18 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -27,7 +27,9 @@
  */
 int num_node_memblks;
 struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
 /*
  * This is a matrix with "distances" between nodes, they should be
  * proportional to the memory access latency ratios.
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 655da24..8caf424 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -11,6 +11,9 @@
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
  * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -26,6 +29,9 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = {
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
 
 /*
  * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -84,14 +94,143 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	atomic_t	cur;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	atomic_set(&ss->cur, val);
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
+		while (atomic_read(&ss->cur) == 0)
+			cpu_relax();
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	atomic_add(1, &ss->cur);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
+void
+setup_ptcg_sem(int max_purges, int nptcg_from)
+{
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
+
+	if (toolatetochangeptcgsem) {
+		if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
+			BUG_ON(1 < nptcg);
+		else
+			BUG_ON(max_purges < nptcg);
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
+	}
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
 void
 ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		       unsigned long end, unsigned long nbits)
 {
-	static DEFINE_SPINLOCK(ptcg_lock);
-
 	struct mm_struct *active_mm = current->active_mm;
 
+	toolatetochangeptcgsem = 1;
+
 	if (mm != active_mm) {
 		/* Restore region IDs for mm */
 		if (mm && active_mm) {
@@ -102,19 +241,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		}
 	}
 
-	/* HW requires global serialization of ptc.ga.  */
-	spin_lock(&ptcg_lock);
-	{
-		do {
-			/*
-			 * Flush ALAT entries also.
-			 */
-			ia64_ptcga(start, (nbits<<2));
-			ia64_srlz_i();
-			start += (1UL << nbits);
-		} while (start < end);
-	}
-	spin_unlock(&ptcg_lock);
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
 
         if (mm != active_mm) {
                 activate_context(active_mm);
@@ -190,6 +330,9 @@ ia64_tlb_init (void)
 	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
 	unsigned long tr_pgbits;
 	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
 
 	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
 		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
@@ -206,4 +349,191 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
 	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
+			"IA64_TR_ALLOC_MAX should be extended\n");
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
+				!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][0][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][1][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][slot];
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][slot];
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
+				(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
 }
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 53d0a8e..77b15f8 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -504,54 +504,12 @@ pcibios_update_irq (struct pci_dev *dev, int irq)
 	/* ??? FIXME -- record old value for shutdown.  */
 }
 
-static inline int
-pcibios_enable_resources (struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-
-	if (!dev)
-		return -EINVAL;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx=0; idx<PCI_NUM_RESOURCES; idx++) {
-		/* Only set up the desired resources.  */
-		if (!(mask & (1 << idx)))
-			continue;
-
-		r = &dev->resource[idx];
-		if (!(r->flags & type_mask))
-			continue;
-		if ((idx == PCI_ROM_RESOURCE) &&
-				(!(r->flags & IORESOURCE_ROM_ENABLE)))
-			continue;
-		if (!r->start && r->end) {
-			printk(KERN_ERR
-			       "PCI: Device %s not available because of resource collisions\n",
-			       pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
 int
 pcibios_enable_device (struct pci_dev *dev, int mask)
 {
 	int ret;
 
-	ret = pcibios_enable_resources(dev, mask);
+	ret = pci_enable_resources(dev, mask);
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 688a3c27..0591038 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -4,7 +4,7 @@
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
 #
-# Copyright (C) 1999,2001-2006 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc.  All Rights Reserved.
 #
 
 EXTRA_CFLAGS += -Iarch/ia64/sn/include
@@ -15,9 +15,4 @@ obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
 				   sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
 obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xp.o
-xp-y				:= xp_main.o xp_nofault.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xpc.o
-xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xpnet.o
 obj-$(CONFIG_PCI_MSI)		+= msi_sn.o
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 0101c79..08b0d9b 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -187,8 +187,8 @@ void hub_error_init(struct hubdev_info *hubdev_info)
 {
 
 	if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED,
-			"SN_hub_error", (void *)hubdev_info)) {
-		printk("hub_error_init: Failed to request_irq for 0x%p\n",
+			"SN_hub_error", hubdev_info)) {
+		printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n",
 		    hubdev_info);
 		return;
 	}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index dfc6bf1..49d3120 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -550,11 +550,12 @@ static int __init sn2_ptc_init(void)
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+	proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
+				   NULL, &proc_sn2_ptc_operations);
+	if (!&proc_sn2_ptc_operations) {
 		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
 		return -EINVAL;
 	}
-	proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
 	spin_lock_init(&sn2_global_ptc_lock);
 	return 0;
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 4b0d153..8cc0c47 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -37,7 +37,6 @@
 
 #include <asm/processor.h>
 #include <asm/topology.h>
-#include <asm/semaphore.h>
 #include <asm/uaccess.h>
 #include <asm/sal.h>
 #include <asm/sn/io.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 62b3e9a..2526e5c 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -139,30 +139,21 @@ static const struct file_operations proc_sn_topo_fops = {
 void register_sn_procfs(void)
 {
 	static struct proc_dir_entry *sgi_proc_dir = NULL;
-	struct proc_dir_entry *pde;
 
 	BUG_ON(sgi_proc_dir != NULL);
 	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
 		return;
 
-	pde = create_proc_entry("partition_id", 0444, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_partition_id_fops;
-	pde = create_proc_entry("system_serial_number", 0444, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_system_sn_fops;
-	pde = create_proc_entry("licenseID", 0444, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_license_id_fops;
-	pde = create_proc_entry("sn_force_interrupt", 0644, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_sn_force_intr_fops;
-	pde = create_proc_entry("coherence_id", 0444, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_coherence_id_fops;
-	pde = create_proc_entry("sn_topology", 0444, sgi_proc_dir);
-	if (pde)
-		pde->proc_fops = &proc_sn_topo_fops;
+	proc_create("partition_id", 0444, sgi_proc_dir,
+		    &proc_partition_id_fops);
+	proc_create("system_serial_number", 0444, sgi_proc_dir,
+		    &proc_system_sn_fops);
+	proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
+	proc_create("sn_force_interrupt", 0644, sgi_proc_dir,
+		    &proc_sn_force_intr_fops);
+	proc_create("coherence_id", 0444, sgi_proc_dir,
+		    &proc_coherence_id_fops);
+	proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c
deleted file mode 100644
index b7ea466..0000000
--- a/arch/ia64/sn/kernel/xp_main.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition (XP) base.
- *
- *	XP provides a base from which its users can interact
- *	with XPC, yet not be dependent on XPC.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * Target of nofault PIO read.
- */
-u64 xp_nofault_PIOR_target;
-
-
-/*
- * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
- * users of XPC.
- */
-struct xpc_registration xpc_registrations[XPC_NCHANNELS];
-
-
-/*
- * Initialize the XPC interface to indicate that XPC isn't loaded.
- */
-static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; }
-
-struct xpc_interface xpc_interface = {
-	(void (*)(int)) xpc_notloaded,
-	(void (*)(int)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *))
-							xpc_notloaded,
-	(void (*)(partid_t, int, void *)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, void *)) xpc_notloaded
-};
-
-
-/*
- * XPC calls this when it (the XPC module) has been loaded.
- */
-void
-xpc_set_interface(void (*connect)(int),
-		void (*disconnect)(int),
-		enum xpc_retval (*allocate)(partid_t, int, u32, void **),
-		enum xpc_retval (*send)(partid_t, int, void *),
-		enum xpc_retval (*send_notify)(partid_t, int, void *,
-						xpc_notify_func, void *),
-		void (*received)(partid_t, int, void *),
-		enum xpc_retval (*partid_to_nasids)(partid_t, void *))
-{
-	xpc_interface.connect = connect;
-	xpc_interface.disconnect = disconnect;
-	xpc_interface.allocate = allocate;
-	xpc_interface.send = send;
-	xpc_interface.send_notify = send_notify;
-	xpc_interface.received = received;
-	xpc_interface.partid_to_nasids = partid_to_nasids;
-}
-
-
-/*
- * XPC calls this when it (the XPC module) is being unloaded.
- */
-void
-xpc_clear_interface(void)
-{
-	xpc_interface.connect = (void (*)(int)) xpc_notloaded;
-	xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
-	xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32,
-					void **)) xpc_notloaded;
-	xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *))
-					xpc_notloaded;
-	xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *,
-				    xpc_notify_func, void *)) xpc_notloaded;
-	xpc_interface.received = (void (*)(partid_t, int, void *))
-					xpc_notloaded;
-	xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *))
-					xpc_notloaded;
-}
-
-
-/*
- * Register for automatic establishment of a channel connection whenever
- * a partition comes up.
- *
- * Arguments:
- *
- *	ch_number - channel # to register for connection.
- *	func - function to call for asynchronous notification of channel
- *	       state changes (i.e., connection, disconnection, error) and
- *	       the arrival of incoming messages.
- *      key - pointer to optional user-defined value that gets passed back
- *	      to the user on any callouts made to func.
- *	payload_size - size in bytes of the XPC message's payload area which
- *		       contains a user-defined message. The user should make
- *		       this large enough to hold their largest message.
- *	nentries - max #of XPC message entries a message queue can contain.
- *		   The actual number, which is determined when a connection
- * 		   is established and may be less then requested, will be
- *		   passed to the user via the xpcConnected callout.
- *	assigned_limit - max number of kthreads allowed to be processing
- * 			 messages (per connection) at any given instant.
- *	idle_limit - max number of kthreads allowed to be idle at any given
- * 		     instant.
- */
-enum xpc_retval
-xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
-		u16 nentries, u32 assigned_limit, u32 idle_limit)
-{
-	struct xpc_registration *registration;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-	DBUG_ON(payload_size == 0 || nentries == 0);
-	DBUG_ON(func == NULL);
-	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
-
-	registration = &xpc_registrations[ch_number];
-
-	if (mutex_lock_interruptible(&registration->mutex) != 0) {
-		return xpcInterrupted;
-	}
-
-	/* if XPC_CHANNEL_REGISTERED(ch_number) */
-	if (registration->func != NULL) {
-		mutex_unlock(&registration->mutex);
-		return xpcAlreadyRegistered;
-	}
-
-	/* register the channel for connection */
-	registration->msg_size = XPC_MSG_SIZE(payload_size);
-	registration->nentries = nentries;
-	registration->assigned_limit = assigned_limit;
-	registration->idle_limit = idle_limit;
-	registration->key = key;
-	registration->func = func;
-
-	mutex_unlock(&registration->mutex);
-
-	xpc_interface.connect(ch_number);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Remove the registration for automatic connection of the specified channel
- * when a partition comes up.
- *
- * Before returning this xpc_disconnect() will wait for all connections on the
- * specified channel have been closed/torndown. So the caller can be assured
- * that they will not be receiving any more callouts from XPC to their
- * function registered via xpc_connect().
- *
- * Arguments:
- *
- *	ch_number - channel # to unregister.
- */
-void
-xpc_disconnect(int ch_number)
-{
-	struct xpc_registration *registration;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	registration = &xpc_registrations[ch_number];
-
-	/*
-	 * We've decided not to make this a down_interruptible(), since we
-	 * figured XPC's users will just turn around and call xpc_disconnect()
-	 * again anyways, so we might as well wait, if need be.
-	 */
-	mutex_lock(&registration->mutex);
-
-	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
-	if (registration->func == NULL) {
-		mutex_unlock(&registration->mutex);
-		return;
-	}
-
-	/* remove the connection registration for the specified channel */
-	registration->func = NULL;
-	registration->key = NULL;
-	registration->nentries = 0;
-	registration->msg_size = 0;
-	registration->assigned_limit = 0;
-	registration->idle_limit = 0;
-
-	xpc_interface.disconnect(ch_number);
-
-	mutex_unlock(&registration->mutex);
-
-	return;
-}
-
-
-int __init
-xp_init(void)
-{
-	int ret, ch_number;
-	u64 func_addr = *(u64 *) xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *) xp_error_PIOR;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-	/*
-	 * Register a nofault code region which performs a cross-partition
-	 * PIO read. If the PIO read times out, the MCA handler will consume
-	 * the error and return to a kernel-provided instruction to indicate
-	 * an error. This PIO read exists because it is guaranteed to timeout
-	 * if the destination is down (AMO operations do not timeout on at
-	 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
-	 * work around).
-	 */
-	if ((ret = sn_register_nofault_code(func_addr, err_func_addr,
-						err_func_addr, 1, 1)) != 0) {
-		printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
-			ret);
-	}
-	/*
-	 * Setup the nofault PIO read target. (There is no special reason why
-	 * SH_IPI_ACCESS was selected.)
-	 */
-	if (is_shub2()) {
-		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
-	} else {
-		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
-	}
-
-	/* initialize the connection registration mutex */
-	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
-		mutex_init(&xpc_registrations[ch_number].mutex);
-	}
-
-	return 0;
-}
-module_init(xp_init);
-
-
-void __exit
-xp_exit(void)
-{
-	u64 func_addr = *(u64 *) xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *) xp_error_PIOR;
-
-
-	/* unregister the PIO read nofault code region */
-	(void) sn_register_nofault_code(func_addr, err_func_addr,
-					err_func_addr, 1, 0);
-}
-module_exit(xp_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition (XP) base");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(xp_nofault_PIOR);
-EXPORT_SYMBOL(xp_nofault_PIOR_target);
-EXPORT_SYMBOL(xpc_registrations);
-EXPORT_SYMBOL(xpc_interface);
-EXPORT_SYMBOL(xpc_clear_interface);
-EXPORT_SYMBOL(xpc_set_interface);
-EXPORT_SYMBOL(xpc_connect);
-EXPORT_SYMBOL(xpc_disconnect);
-
diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S
deleted file mode 100644
index 98e7c7d..0000000
--- a/arch/ia64/sn/kernel/xp_nofault.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2007 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * The xp_nofault_PIOR function takes a pointer to a remote PIO register
- * and attempts to load and consume a value from it.  This function
- * will be registered as a nofault code block.  In the event that the
- * PIO read fails, the MCA handler will force the error to look
- * corrected and vector to the xp_error_PIOR which will return an error.
- *
- * The definition of "consumption" and the time it takes for an MCA
- * to surface is processor implementation specific.  This code
- * is sufficient on Itanium through the Montvale processor family.
- * It may need to be adjusted for future processor implementations.
- *
- *	extern int xp_nofault_PIOR(void *remote_register);
- */
-
-	.global xp_nofault_PIOR
-xp_nofault_PIOR:
-	mov	r8=r0			// Stage a success return value
-	ld8.acq	r9=[r32];;		// PIO Read the specified register
-	adds	r9=1,r9;;		// Add to force consumption
-	srlz.i;;			// Allow time for MCA to surface
-	br.ret.sptk.many b0;;		// Return success
-
-	.global xp_error_PIOR
-xp_error_PIOR:
-	mov	r8=1			// Return value of 1
-	br.ret.sptk.many b0;;		// Return failure
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
deleted file mode 100644
index 44ccc0d..0000000
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ /dev/null
@@ -1,2379 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) channel support.
- *
- *	This is the part of XPC that manages the channels and
- *	sends/receives messages across them to/from other partitions.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/xpc.h>
-
-
-/*
- * Guarantee that the kzalloc'd memory is cacheline aligned.
- */
-static void *
-xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kzalloc will give us cachline aligned memory by default */
-	*base = kzalloc(size, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
-		return *base;
-	}
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kzalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Set up the initial values for the XPartition Communication channels.
- */
-static void
-xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
-{
-	int ch_number;
-	struct xpc_channel *ch;
-
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		ch->partid = partid;
-		ch->number = ch_number;
-		ch->flags = XPC_C_DISCONNECTED;
-
-		ch->local_GP = &part->local_GPs[ch_number];
-		ch->local_openclose_args =
-					&part->local_openclose_args[ch_number];
-
-		atomic_set(&ch->kthreads_assigned, 0);
-		atomic_set(&ch->kthreads_idle, 0);
-		atomic_set(&ch->kthreads_active, 0);
-
-		atomic_set(&ch->references, 0);
-		atomic_set(&ch->n_to_notify, 0);
-
-		spin_lock_init(&ch->lock);
-		mutex_init(&ch->msg_to_pull_mutex);
-		init_completion(&ch->wdisconnect_wait);
-
-		atomic_set(&ch->n_on_msg_allocate_wq, 0);
-		init_waitqueue_head(&ch->msg_allocate_wq);
-		init_waitqueue_head(&ch->idle_wq);
-	}
-}
-
-
-/*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-enum xpc_retval
-xpc_setup_infrastructure(struct xpc_partition *part)
-{
-	int ret, cpuid;
-	struct timer_list *timer;
-	partid_t partid = XPC_PARTID(part);
-
-
-	/*
-	 * Zero out MOST of the entry for this partition. Only the fields
-	 * starting with `nchannels' will be zeroed. The preceding fields must
-	 * remain `viable' across partition ups and downs, since they may be
-	 * referenced during this memset() operation.
-	 */
-	memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
-				offsetof(struct xpc_partition, nchannels));
-
-	/*
-	 * Allocate all of the channel structures as a contiguous chunk of
-	 * memory.
-	 */
-	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
-								GFP_KERNEL);
-	if (part->channels == NULL) {
-		dev_err(xpc_chan, "can't get memory for channels\n");
-		return xpcNoMemory;
-	}
-
-	part->nchannels = XPC_NCHANNELS;
-
-
-	/* allocate all the required GET/PUT values */
-
-	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-					GFP_KERNEL, &part->local_GPs_base);
-	if (part->local_GPs == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		dev_err(xpc_chan, "can't get memory for local get/put "
-			"values\n");
-		return xpcNoMemory;
-	}
-
-	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-					GFP_KERNEL, &part->remote_GPs_base);
-	if (part->remote_GPs == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote get/put "
-			"values\n");
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-
-	/* allocate all the required open and close args */
-
-	part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
-					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					&part->local_openclose_args_base);
-	if (part->local_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for local connect args\n");
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-	part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
-					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					&part->remote_openclose_args_base);
-	if (part->remote_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote connect args\n");
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-
-	xpc_initialize_channels(part, partid);
-
-	atomic_set(&part->nchannels_active, 0);
-	atomic_set(&part->nchannels_engaged, 0);
-
-
-	/* local_IPI_amo were set to 0 by an earlier memset() */
-
-	/* Initialize this partitions AMO_t structure */
-	part->local_IPI_amo_va = xpc_IPI_init(partid);
-
-	spin_lock_init(&part->IPI_lock);
-
-	atomic_set(&part->channel_mgr_requests, 1);
-	init_waitqueue_head(&part->channel_mgr_wq);
-
-	sprintf(part->IPI_owner, "xpc%02d", partid);
-	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
-				part->IPI_owner, (void *) (u64) partid);
-	if (ret != 0) {
-		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
-			"errno=%d\n", -ret);
-		kfree(part->remote_openclose_args_base);
-		part->remote_openclose_args = NULL;
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcLackOfResources;
-	}
-
-	/* Setup a timer to check for dropped IPIs */
-	timer = &part->dropped_IPI_timer;
-	init_timer(timer);
-	timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check;
-	timer->data = (unsigned long) part;
-	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
-	add_timer(timer);
-
-	/*
-	 * With the setting of the partition setup_state to XPC_P_SETUP, we're
-	 * declaring that this partition is ready to go.
-	 */
-	part->setup_state = XPC_P_SETUP;
-
-
-	/*
-	 * Setup the per partition specific variables required by the
-	 * remote partition to establish channel connections with us.
-	 *
-	 * The setting of the magic # indicates that these per partition
-	 * specific variables are ready to be used.
-	 */
-	xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
-	xpc_vars_part[partid].openclose_args_pa =
-					__pa(part->local_openclose_args);
-	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
-	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
-	xpc_vars_part[partid].nchannels = part->nchannels;
-	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
-
-	return xpcSuccess;
-}
-
-
-/*
- * Create a wrapper that hides the underlying mechanism for pulling a cacheline
- * (or multiple cachelines) from a remote partition.
- *
- * src must be a cacheline aligned physical address on the remote partition.
- * dst must be a cacheline aligned virtual address on this partition.
- * cnt must be an cacheline sized
- */
-static enum xpc_retval
-xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
-				const void *src, size_t cnt)
-{
-	bte_result_t bte_ret;
-
-
-	DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src));
-	DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst));
-	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		return part->reason;
-	}
-
-	bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt,
-					(BTE_NORMAL | BTE_WACQUIRE), NULL);
-	if (bte_ret == BTE_SUCCESS) {
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
-		XPC_PARTID(part), bte_ret);
-
-	return xpc_map_bte_errors(bte_ret);
-}
-
-
-/*
- * Pull the remote per partition specific variables from the specified
- * partition.
- */
-enum xpc_retval
-xpc_pull_remote_vars_part(struct xpc_partition *part)
-{
-	u8 buffer[L1_CACHE_BYTES * 2];
-	struct xpc_vars_part *pulled_entry_cacheline =
-			(struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer);
-	struct xpc_vars_part *pulled_entry;
-	u64 remote_entry_cacheline_pa, remote_entry_pa;
-	partid_t partid = XPC_PARTID(part);
-	enum xpc_retval ret;
-
-
-	/* pull the cacheline that contains the variables we're interested in */
-
-	DBUG_ON(part->remote_vars_part_pa !=
-				L1_CACHE_ALIGN(part->remote_vars_part_pa));
-	DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
-
-	remote_entry_pa = part->remote_vars_part_pa +
-			sn_partition_id * sizeof(struct xpc_vars_part);
-
-	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
-
-	pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline +
-				(remote_entry_pa & (L1_CACHE_BYTES - 1)));
-
-	ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
-					(void *) remote_entry_cacheline_pa,
-					L1_CACHE_BYTES);
-	if (ret != xpcSuccess) {
-		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
-			"partition %d, ret=%d\n", partid, ret);
-		return ret;
-	}
-
-
-	/* see if they've been set up yet */
-
-	if (pulled_entry->magic != XPC_VP_MAGIC1 &&
-				pulled_entry->magic != XPC_VP_MAGIC2) {
-
-		if (pulled_entry->magic != 0) {
-			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d has bad magic value (=0x%lx)\n",
-				partid, sn_partition_id, pulled_entry->magic);
-			return xpcBadMagic;
-		}
-
-		/* they've not been initialized yet */
-		return xpcRetry;
-	}
-
-	if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
-
-		/* validate the variables */
-
-		if (pulled_entry->GPs_pa == 0 ||
-				pulled_entry->openclose_args_pa == 0 ||
-					pulled_entry->IPI_amo_pa == 0) {
-
-			dev_err(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d are not valid\n", partid,
-				sn_partition_id);
-			return xpcInvalidAddress;
-		}
-
-		/* the variables we imported look to be valid */
-
-		part->remote_GPs_pa = pulled_entry->GPs_pa;
-		part->remote_openclose_args_pa =
-					pulled_entry->openclose_args_pa;
-		part->remote_IPI_amo_va =
-				      (AMO_t *) __va(pulled_entry->IPI_amo_pa);
-		part->remote_IPI_nasid = pulled_entry->IPI_nasid;
-		part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
-
-		if (part->nchannels > pulled_entry->nchannels) {
-			part->nchannels = pulled_entry->nchannels;
-		}
-
-		/* let the other side know that we've pulled their variables */
-
-		xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
-	}
-
-	if (pulled_entry->magic == XPC_VP_MAGIC1) {
-		return xpcRetry;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
- */
-static u64
-xpc_get_IPI_flags(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	u64 IPI_amo;
-	enum xpc_retval ret;
-
-
-	/*
-	 * See if there are any IPI flags to be handled.
-	 */
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	if ((IPI_amo = part->local_IPI_amo) != 0) {
-		part->local_IPI_amo = 0;
-	}
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-
-	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part,
-					part->remote_openclose_args,
-					(void *) part->remote_openclose_args_pa,
-					XPC_OPENCLOSE_ARGS_SIZE);
-		if (ret != xpcSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull openclose args from "
-				"partition %d, ret=%d\n", XPC_PARTID(part),
-				ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
-						(void *) part->remote_GPs_pa,
-						XPC_GP_SIZE);
-		if (ret != xpcSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull GPs from partition "
-				"%d, ret=%d\n", XPC_PARTID(part), ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	return IPI_amo;
-}
-
-
-/*
- * Allocate the local message queue and the notify queue.
- */
-static enum xpc_retval
-xpc_allocate_local_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-
-	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
-	// >>> iterations of the for-loop, bail if set?
-
-	// >>> should we impose a minimum #of entries? like 4 or 8?
-	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-						GFP_KERNEL,
-						&ch->local_msgqueue_base);
-		if (ch->local_msgqueue == NULL) {
-			continue;
-		}
-
-		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
-		if (ch->notify_queue == NULL) {
-			kfree(ch->local_msgqueue_base);
-			ch->local_msgqueue = NULL;
-			continue;
-		}
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->local_nentries, ch->partid, ch->number);
-
-			ch->local_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
-		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpcNoMemory;
-}
-
-
-/*
- * Allocate the cached remote message queue.
- */
-static enum xpc_retval
-xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-
-	DBUG_ON(ch->remote_nentries <= 0);
-
-	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
-	// >>> iterations of the for-loop, bail if set?
-
-	// >>> should we impose a minimum #of entries? like 4 or 8?
-	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-						GFP_KERNEL,
-						&ch->remote_msgqueue_base);
-		if (ch->remote_msgqueue == NULL) {
-			continue;
-		}
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->remote_nentries, ch->partid, ch->number);
-
-			ch->remote_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
-		"partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpcNoMemory;
-}
-
-
-/*
- * Allocate message queues and other stuff associated with a channel.
- *
- * Note: Assumes all of the channel sizes are filled in.
- */
-static enum xpc_retval
-xpc_allocate_msgqueues(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	enum xpc_retval ret;
-
-
-	DBUG_ON(ch->flags & XPC_C_SETUP);
-
-	if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) {
-		return ret;
-	}
-
-	if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) {
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-		return ret;
-	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-	ch->flags |= XPC_C_SETUP;
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Process a connect message from a remote partition.
- *
- * Note: xpc_process_connect() is expecting to be called with the
- * spin_lock_irqsave held and will leave it locked upon return.
- */
-static void
-xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	enum xpc_retval ret;
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (!(ch->flags & XPC_C_OPENREQUEST) ||
-				!(ch->flags & XPC_C_ROPENREQUEST)) {
-		/* nothing more to do for now */
-		return;
-	}
-	DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
-
-	if (!(ch->flags & XPC_C_SETUP)) {
-		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		ret = xpc_allocate_msgqueues(ch);
-		spin_lock_irqsave(&ch->lock, *irq_flags);
-
-		if (ret != xpcSuccess) {
-			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
-		}
-		if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) {
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_SETUP));
-		DBUG_ON(ch->local_msgqueue == NULL);
-		DBUG_ON(ch->remote_msgqueue == NULL);
-	}
-
-	if (!(ch->flags & XPC_C_OPENREPLY)) {
-		ch->flags |= XPC_C_OPENREPLY;
-		xpc_IPI_send_openreply(ch, irq_flags);
-	}
-
-	if (!(ch->flags & XPC_C_ROPENREPLY)) {
-		return;
-	}
-
-	DBUG_ON(ch->remote_msgqueue_pa == 0);
-
-	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
-
-	dev_info(xpc_chan, "channel %d to partition %d connected\n",
-		ch->number, ch->partid);
-
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-	xpc_create_kthreads(ch, 1, 0);
-	spin_lock_irqsave(&ch->lock, *irq_flags);
-}
-
-
-/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
-{
-	struct xpc_notify *notify;
-	u8 notify_type;
-	s64 get = ch->w_remote_GP.get - 1;
-
-
-	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
-		notify = &ch->notify_queue[get % ch->local_nentries];
-
-		/*
-		 * See if the notify entry indicates it was associated with
-		 * a message who's sender wants to be notified. It is possible
-		 * that it is, but someone else is doing or has done the
-		 * notification.
-		 */
-		notify_type = notify->type;
-		if (notify_type == 0 ||
-				cmpxchg(&notify->type, notify_type, 0) !=
-								notify_type) {
-			continue;
-		}
-
-		DBUG_ON(notify_type != XPC_N_CALL);
-
-		atomic_dec(&ch->n_to_notify);
-
-		if (notify->func != NULL) {
-			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) notify, get, ch->partid, ch->number);
-
-			notify->func(reason, ch->partid, ch->number,
-								notify->key);
-
-			dev_dbg(xpc_chan, "notify->func() returned, "
-				"notify=0x%p, msg_number=%ld, partid=%d, "
-				"channel=%d\n", (void *) notify, get,
-				ch->partid, ch->number);
-		}
-	}
-}
-
-
-/*
- * Free up message queues and other stuff that were allocated for the specified
- * channel.
- *
- * Note: ch->reason and ch->reason_line are left set for debugging purposes,
- * they're cleared when XPC_C_DISCONNECTED is cleared.
- */
-static void
-xpc_free_msgqueues(struct xpc_channel *ch)
-{
-	DBUG_ON(!spin_is_locked(&ch->lock));
-	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
-
-	ch->remote_msgqueue_pa = 0;
-	ch->func = NULL;
-	ch->key = NULL;
-	ch->msg_size = 0;
-	ch->local_nentries = 0;
-	ch->remote_nentries = 0;
-	ch->kthreads_assigned_limit = 0;
-	ch->kthreads_idle_limit = 0;
-
-	ch->local_GP->get = 0;
-	ch->local_GP->put = 0;
-	ch->remote_GP.get = 0;
-	ch->remote_GP.put = 0;
-	ch->w_local_GP.get = 0;
-	ch->w_local_GP.put = 0;
-	ch->w_remote_GP.get = 0;
-	ch->w_remote_GP.put = 0;
-	ch->next_msg_to_pull = 0;
-
-	if (ch->flags & XPC_C_SETUP) {
-		ch->flags &= ~XPC_C_SETUP;
-
-		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
-			ch->flags, ch->partid, ch->number);
-
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->remote_msgqueue_base);
-		ch->remote_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-	}
-}
-
-
-/*
- * spin_lock_irqsave() is expected to be held on entry.
- */
-static void
-xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (!(ch->flags & XPC_C_DISCONNECTING)) {
-		return;
-	}
-
-	DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-
-	/* make sure all activity has settled down first */
-
-	if (atomic_read(&ch->kthreads_assigned) > 0 ||
-				atomic_read(&ch->references) > 0) {
-		return;
-	}
-	DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
-			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		/* can't proceed until the other side disengages from us */
-		if (xpc_partition_engaged(1UL << ch->partid)) {
-			return;
-		}
-
-	} else {
-
-		/* as long as the other side is up do the full protocol */
-
-		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
-			return;
-		}
-
-		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
-			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_IPI_send_closereply(ch, irq_flags);
-		}
-
-		if (!(ch->flags & XPC_C_RCLOSEREPLY)) {
-			return;
-		}
-	}
-
-	/* wake those waiting for notify completion */
-	if (atomic_read(&ch->n_to_notify) > 0) {
-		/* >>> we do callout while holding ch->lock */
-		xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
-	}
-
-	/* both sides are disconnected now */
-
-	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
-		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		xpc_disconnect_callout(ch, xpcDisconnected);
-		spin_lock_irqsave(&ch->lock, *irq_flags);
-	}
-
-	/* it's now safe to free the channel's message queues */
-	xpc_free_msgqueues(ch);
-
-	/* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
-	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
-
-	atomic_dec(&part->nchannels_active);
-
-	if (channel_was_connected) {
-		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
-			"reason=%d\n", ch->number, ch->partid, ch->reason);
-	}
-
-	if (ch->flags & XPC_C_WDISCONNECT) {
-		/* we won't lose the CPU since we're holding ch->lock */
-		complete(&ch->wdisconnect_wait);
-	} else if (ch->delayed_IPI_flags) {
-		if (part->act_state != XPC_P_DEACTIVATING) {
-			/* time to take action on any delayed IPI flags */
-			spin_lock(&part->IPI_lock);
-			XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
-							ch->delayed_IPI_flags);
-			spin_unlock(&part->IPI_lock);
-		}
-		ch->delayed_IPI_flags = 0;
-	}
-}
-
-
-/*
- * Process a change in the channel's remote connection state.
- */
-static void
-xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
-				u8 IPI_flags)
-{
-	unsigned long irq_flags;
-	struct xpc_openclose_args *args =
-				&part->remote_openclose_args[ch_number];
-	struct xpc_channel *ch = &part->channels[ch_number];
-	enum xpc_retval reason;
-
-
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-
-again:
-
-	if ((ch->flags & XPC_C_DISCONNECTED) &&
-					(ch->flags & XPC_C_WDISCONNECT)) {
-		/*
-		 * Delay processing IPI flags until thread waiting disconnect
-		 * has had a chance to see that the channel is disconnected.
-		 */
-		ch->delayed_IPI_flags |= IPI_flags;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return;
-	}
-
-
-	if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
-			"from partid=%d, channel=%d\n", args->reason,
-			ch->partid, ch->number);
-
-		/*
-		 * If RCLOSEREQUEST is set, we're probably waiting for
-		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
-		 * with this RCLOSEREQUEST in the IPI_flags.
-		 */
-
-		if (ch->flags & XPC_C_RCLOSEREQUEST) {
-			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
-			DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
-			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
-
-			DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
-			IPI_flags &= ~XPC_IPI_CLOSEREPLY;
-			ch->flags |= XPC_C_RCLOSEREPLY;
-
-			/* both sides have finished disconnecting */
-			xpc_process_disconnect(ch, &irq_flags);
-			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
-			goto again;
-		}
-
-		if (ch->flags & XPC_C_DISCONNECTED) {
-			if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
-				if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
-					 ch_number) & XPC_IPI_OPENREQUEST)) {
-
-					DBUG_ON(ch->delayed_IPI_flags != 0);
-					spin_lock(&part->IPI_lock);
-					XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-							ch_number,
-							XPC_IPI_CLOSEREQUEST);
-					spin_unlock(&part->IPI_lock);
-				}
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
-			}
-
-			XPC_SET_REASON(ch, 0, 0);
-			ch->flags &= ~XPC_C_DISCONNECTED;
-
-			atomic_inc(&part->nchannels_active);
-			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
-		}
-
-		IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
-
-		/*
-		 * The meaningful CLOSEREQUEST connection state fields are:
-		 *      reason = reason connection is to be closed
-		 */
-
-		ch->flags |= XPC_C_RCLOSEREQUEST;
-
-		if (!(ch->flags & XPC_C_DISCONNECTING)) {
-			reason = args->reason;
-			if (reason <= xpcSuccess || reason > xpcUnknownReason) {
-				reason = xpcUnknownReason;
-			} else if (reason == xpcUnregistering) {
-				reason = xpcOtherUnregistering;
-			}
-
-			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-
-			DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		xpc_process_disconnect(ch, &irq_flags);
-	}
-
-
-	if (IPI_flags & XPC_IPI_CLOSEREPLY) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
-			" channel=%d\n", ch->partid, ch->number);
-
-		if (ch->flags & XPC_C_DISCONNECTED) {
-			DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-
-		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
-			if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
-						& XPC_IPI_CLOSEREQUEST)) {
-
-				DBUG_ON(ch->delayed_IPI_flags != 0);
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-						ch_number, XPC_IPI_CLOSEREPLY);
-				spin_unlock(&part->IPI_lock);
-			}
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		ch->flags |= XPC_C_RCLOSEREPLY;
-
-		if (ch->flags & XPC_C_CLOSEREPLY) {
-			/* both sides have finished disconnecting */
-			xpc_process_disconnect(ch, &irq_flags);
-		}
-	}
-
-
-	if (IPI_flags & XPC_IPI_OPENREQUEST) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
-			"local_nentries=%d) received from partid=%d, "
-			"channel=%d\n", args->msg_size, args->local_nentries,
-			ch->partid, ch->number);
-
-		if (part->act_state == XPC_P_DEACTIVATING ||
-					(ch->flags & XPC_C_ROPENREQUEST)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
-			ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
-							XPC_C_OPENREQUEST)));
-		DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
-					XPC_C_OPENREPLY | XPC_C_CONNECTED));
-
-		/*
-		 * The meaningful OPENREQUEST connection state fields are:
-		 *      msg_size = size of channel's messages in bytes
-		 *      local_nentries = remote partition's local_nentries
-		 */
-		if (args->msg_size == 0 || args->local_nentries == 0) {
-			/* assume OPENREQUEST was delayed by mistake */
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
-		ch->remote_nentries = args->local_nentries;
-
-
-		if (ch->flags & XPC_C_OPENREQUEST) {
-			if (args->msg_size != ch->msg_size) {
-				XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
-								&irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
-			}
-		} else {
-			ch->msg_size = args->msg_size;
-
-			XPC_SET_REASON(ch, 0, 0);
-			ch->flags &= ~XPC_C_DISCONNECTED;
-
-			atomic_inc(&part->nchannels_active);
-		}
-
-		xpc_process_connect(ch, &irq_flags);
-	}
-
-
-	if (IPI_flags & XPC_IPI_OPENREPLY) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
-			"local_nentries=%d, remote_nentries=%d) received from "
-			"partid=%d, channel=%d\n", args->local_msgqueue_pa,
-			args->local_nentries, args->remote_nentries,
-			ch->partid, ch->number);
-
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-		if (!(ch->flags & XPC_C_OPENREQUEST)) {
-			XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
-								&irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
-		DBUG_ON(ch->flags & XPC_C_CONNECTED);
-
-		/*
-		 * The meaningful OPENREPLY connection state fields are:
-		 *      local_msgqueue_pa = physical address of remote
-		 *			    partition's local_msgqueue
-		 *      local_nentries = remote partition's local_nentries
-		 *      remote_nentries = remote partition's remote_nentries
-		 */
-		DBUG_ON(args->local_msgqueue_pa == 0);
-		DBUG_ON(args->local_nentries == 0);
-		DBUG_ON(args->remote_nentries == 0);
-
-		ch->flags |= XPC_C_ROPENREPLY;
-		ch->remote_msgqueue_pa = args->local_msgqueue_pa;
-
-		if (args->local_nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
-				"remote_nentries=%d, old remote_nentries=%d, "
-				"partid=%d, channel=%d\n",
-				args->local_nentries, ch->remote_nentries,
-				ch->partid, ch->number);
-
-			ch->remote_nentries = args->local_nentries;
-		}
-		if (args->remote_nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
-				"local_nentries=%d, old local_nentries=%d, "
-				"partid=%d, channel=%d\n",
-				args->remote_nentries, ch->local_nentries,
-				ch->partid, ch->number);
-
-			ch->local_nentries = args->remote_nentries;
-		}
-
-		xpc_process_connect(ch, &irq_flags);
-	}
-
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-}
-
-
-/*
- * Attempt to establish a channel connection to a remote partition.
- */
-static enum xpc_retval
-xpc_connect_channel(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	struct xpc_registration *registration = &xpc_registrations[ch->number];
-
-
-	if (mutex_trylock(&registration->mutex) == 0) {
-		return xpcRetry;
-	}
-
-	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
-		mutex_unlock(&registration->mutex);
-		return xpcUnregistered;
-	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-
-	DBUG_ON(ch->flags & XPC_C_CONNECTED);
-	DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		mutex_unlock(&registration->mutex);
-		return ch->reason;
-	}
-
-
-	/* add info from the channel connect registration to the channel */
-
-	ch->kthreads_assigned_limit = registration->assigned_limit;
-	ch->kthreads_idle_limit = registration->idle_limit;
-	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
-	DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
-	DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
-
-	ch->func = registration->func;
-	DBUG_ON(registration->func == NULL);
-	ch->key = registration->key;
-
-	ch->local_nentries = registration->nentries;
-
-	if (ch->flags & XPC_C_ROPENREQUEST) {
-		if (registration->msg_size != ch->msg_size) {
-			/* the local and remote sides aren't the same */
-
-			/*
-			 * Because XPC_DISCONNECT_CHANNEL() can block we're
-			 * forced to up the registration sema before we unlock
-			 * the channel lock. But that's okay here because we're
-			 * done with the part that required the registration
-			 * sema. XPC_DISCONNECT_CHANNEL() requires that the
-			 * channel lock be locked and will unlock and relock
-			 * the channel lock as needed.
-			 */
-			mutex_unlock(&registration->mutex);
-			XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
-								&irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return xpcUnequalMsgSizes;
-		}
-	} else {
-		ch->msg_size = registration->msg_size;
-
-		XPC_SET_REASON(ch, 0, 0);
-		ch->flags &= ~XPC_C_DISCONNECTED;
-
-		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
-	}
-
-	mutex_unlock(&registration->mutex);
-
-
-	/* initiate the connection */
-
-	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_IPI_send_openrequest(ch, &irq_flags);
-
-	xpc_process_connect(ch, &irq_flags);
-
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Clear some of the msg flags in the local message queue.
- */
-static inline void
-xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 get;
-
-
-	get = ch->w_remote_GP.get;
-	do {
-		msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-				(get % ch->local_nentries) * ch->msg_size);
-		msg->flags = 0;
-	} while (++get < (volatile s64) ch->remote_GP.get);
-}
-
-
-/*
- * Clear some of the msg flags in the remote message queue.
- */
-static inline void
-xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 put;
-
-
-	put = ch->w_remote_GP.put;
-	do {
-		msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-				(put % ch->remote_nentries) * ch->msg_size);
-		msg->flags = 0;
-	} while (++put < (volatile s64) ch->remote_GP.put);
-}
-
-
-static void
-xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
-{
-	struct xpc_channel *ch = &part->channels[ch_number];
-	int nmsgs_sent;
-
-
-	ch->remote_GP = part->remote_GPs[ch_number];
-
-
-	/* See what, if anything, has changed for each connected channel */
-
-	xpc_msgqueue_ref(ch);
-
-	if (ch->w_remote_GP.get == ch->remote_GP.get &&
-				ch->w_remote_GP.put == ch->remote_GP.put) {
-		/* nothing changed since GPs were last pulled */
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-	if (!(ch->flags & XPC_C_CONNECTED)){
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-
-	/*
-	 * First check to see if messages recently sent by us have been
-	 * received by the other side. (The remote GET value will have
-	 * changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.get != ch->remote_GP.get) {
-
-		/*
-		 * We need to notify any senders that want to be notified
-		 * that their sent messages have been received by their
-		 * intended recipients. We need to do this before updating
-		 * w_remote_GP.get so that we don't allocate the same message
-		 * queue entries prematurely (see xpc_allocate_msg()).
-		 */
-		if (atomic_read(&ch->n_to_notify) > 0) {
-			/*
-			 * Notify senders that messages sent have been
-			 * received and delivered by the other side.
-			 */
-			xpc_notify_senders(ch, xpcMsgDelivered,
-							ch->remote_GP.get);
-		}
-
-		/*
-		 * Clear msg->flags in previously sent messages, so that
-		 * they're ready for xpc_allocate_msg().
-		 */
-		xpc_clear_local_msgqueue_flags(ch);
-
-		ch->w_remote_GP.get = ch->remote_GP.get;
-
-		dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.get, ch->partid,
-			ch->number);
-
-		/*
-		 * If anyone was waiting for message queue entries to become
-		 * available, wake them up.
-		 */
-		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
-			wake_up(&ch->msg_allocate_wq);
-		}
-	}
-
-
-	/*
-	 * Now check for newly sent messages by the other side. (The remote
-	 * PUT value will have changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.put != ch->remote_GP.put) {
-		/*
-		 * Clear msg->flags in previously received messages, so that
-		 * they're ready for xpc_get_deliverable_msg().
-		 */
-		xpc_clear_remote_msgqueue_flags(ch);
-
-		ch->w_remote_GP.put = ch->remote_GP.put;
-
-		dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.put, ch->partid,
-			ch->number);
-
-		nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
-		if (nmsgs_sent > 0) {
-			dev_dbg(xpc_chan, "msgs waiting to be copied and "
-				"delivered=%d, partid=%d, channel=%d\n",
-				nmsgs_sent, ch->partid, ch->number);
-
-			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
-				xpc_activate_kthreads(ch, nmsgs_sent);
-			}
-		}
-	}
-
-	xpc_msgqueue_deref(ch);
-}
-
-
-void
-xpc_process_channel_activity(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	u64 IPI_amo, IPI_flags;
-	struct xpc_channel *ch;
-	int ch_number;
-	u32 ch_flags;
-
-
-	IPI_amo = xpc_get_IPI_flags(part);
-
-	/*
-	 * Initiate channel connections for registered channels.
-	 *
-	 * For each connected channel that has pending messages activate idle
-	 * kthreads and/or create new kthreads as needed.
-	 */
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-
-		/*
-		 * Process any open or close related IPI flags, and then deal
-		 * with connecting or disconnecting the channel as required.
-		 */
-
-		IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
-
-		if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) {
-			xpc_process_openclose_IPI(part, ch_number, IPI_flags);
-		}
-
-		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
-
-		if (ch_flags & XPC_C_DISCONNECTING) {
-			spin_lock_irqsave(&ch->lock, irq_flags);
-			xpc_process_disconnect(ch, &irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			continue;
-		}
-
-		if (part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		if (!(ch_flags & XPC_C_CONNECTED)) {
-			if (!(ch_flags & XPC_C_OPENREQUEST)) {
-				DBUG_ON(ch_flags & XPC_C_SETUP);
-				(void) xpc_connect_channel(ch);
-			} else {
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				xpc_process_connect(ch, &irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-			}
-			continue;
-		}
-
-
-		/*
-		 * Process any message related IPI flags, this may involve the
-		 * activation of kthreads to deliver any pending messages sent
-		 * from the other partition.
-		 */
-
-		if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) {
-			xpc_process_msg_IPI(part, ch_number);
-		}
-	}
-}
-
-
-/*
- * XPC's heartbeat code calls this function to inform XPC that a partition is
- * going down.  XPC responds by tearing down the XPartition Communication
- * infrastructure used for the just downed partition.
- *
- * XPC's heartbeat code will never call this function and xpc_partition_up()
- * at the same time. Nor will it ever make multiple calls to either function
- * at the same time.
- */
-void
-xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
-{
-	unsigned long irq_flags;
-	int ch_number;
-	struct xpc_channel *ch;
-
-
-	dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
-		XPC_PARTID(part), reason);
-
-	if (!xpc_part_ref(part)) {
-		/* infrastructure for this partition isn't currently set up */
-		return;
-	}
-
-
-	/* disconnect channels associated with the partition going down */
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		xpc_msgqueue_ref(ch);
-		spin_lock_irqsave(&ch->lock, irq_flags);
-
-		XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		xpc_msgqueue_deref(ch);
-	}
-
-	xpc_wakeup_channel_mgr(part);
-
-	xpc_part_deref(part);
-}
-
-
-/*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-void
-xpc_teardown_infrastructure(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-
-
-	/*
-	 * We start off by making this partition inaccessible to local
-	 * processes by marking it as no longer setup. Then we make it
-	 * inaccessible to remote processes by clearing the XPC per partition
-	 * specific variable's magic # (which indicates that these variables
-	 * are no longer valid) and by ignoring all XPC notify IPIs sent to
-	 * this partition.
-	 */
-
-	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
-	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
-	DBUG_ON(part->setup_state != XPC_P_SETUP);
-	part->setup_state = XPC_P_WTEARDOWN;
-
-	xpc_vars_part[partid].magic = 0;
-
-
-	free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid);
-
-
-	/*
-	 * Before proceeding with the teardown we have to wait until all
-	 * existing references cease.
-	 */
-	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
-
-
-	/* now we can begin tearing down the infrastructure */
-
-	part->setup_state = XPC_P_TORNDOWN;
-
-	/* in case we've still got outstanding timers registered... */
-	del_timer_sync(&part->dropped_IPI_timer);
-
-	kfree(part->remote_openclose_args_base);
-	part->remote_openclose_args = NULL;
-	kfree(part->local_openclose_args_base);
-	part->local_openclose_args = NULL;
-	kfree(part->remote_GPs_base);
-	part->remote_GPs = NULL;
-	kfree(part->local_GPs_base);
-	part->local_GPs = NULL;
-	kfree(part->channels);
-	part->channels = NULL;
-	part->local_IPI_amo_va = NULL;
-}
-
-
-/*
- * Called by XP at the time of channel connection registration to cause
- * XPC to establish connections to all currently active partitions.
- */
-void
-xpc_initiate_connect(int ch_number)
-{
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (xpc_part_ref(part)) {
-			ch = &part->channels[ch_number];
-
-			/*
-			 * Initiate the establishment of a connection on the
-			 * newly registered channel to the remote partition.
-			 */
-			xpc_wakeup_channel_mgr(part);
-			xpc_part_deref(part);
-		}
-	}
-}
-
-
-void
-xpc_connected_callout(struct xpc_channel *ch)
-{
-	/* let the registerer know that a connection has been established */
-
-	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, "
-			"partid=%d, channel=%d\n", ch->partid, ch->number);
-
-		ch->func(xpcConnected, ch->partid, ch->number,
-				(void *) (u64) ch->local_nentries, ch->key);
-
-		dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
-			"partid=%d, channel=%d\n", ch->partid, ch->number);
-	}
-}
-
-
-/*
- * Called by XP at the time of channel connection unregistration to cause
- * XPC to teardown all current connections for the specified channel.
- *
- * Before returning xpc_initiate_disconnect() will wait until all connections
- * on the specified channel have been closed/torndown. So the caller can be
- * assured that they will not be receiving any more callouts from XPC to the
- * function they registered via xpc_connect().
- *
- * Arguments:
- *
- *	ch_number - channel # to unregister.
- */
-void
-xpc_initiate_disconnect(int ch_number)
-{
-	unsigned long irq_flags;
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	/* initiate the channel disconnect for every active partition */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (xpc_part_ref(part)) {
-			ch = &part->channels[ch_number];
-			xpc_msgqueue_ref(ch);
-
-			spin_lock_irqsave(&ch->lock, irq_flags);
-
-			if (!(ch->flags & XPC_C_DISCONNECTED)) {
-				ch->flags |= XPC_C_WDISCONNECT;
-
-				XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
-								&irq_flags);
-			}
-
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			xpc_msgqueue_deref(ch);
-			xpc_part_deref(part);
-		}
-	}
-
-	xpc_disconnect_wait(ch_number);
-}
-
-
-/*
- * To disconnect a channel, and reflect it back to all who may be waiting.
- *
- * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
- * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
- * xpc_disconnect_wait().
- *
- * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
- */
-void
-xpc_disconnect_channel(const int line, struct xpc_channel *ch,
-			enum xpc_retval reason, unsigned long *irq_flags)
-{
-	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
-		return;
-	}
-	DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
-
-	dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
-		reason, line, ch->partid, ch->number);
-
-	XPC_SET_REASON(ch, reason, line);
-
-	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
-	/* some of these may not have been set */
-	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
-			XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
-			XPC_C_CONNECTING | XPC_C_CONNECTED);
-
-	xpc_IPI_send_closerequest(ch, irq_flags);
-
-	if (channel_was_connected) {
-		ch->flags |= XPC_C_WASCONNECTED;
-	}
-
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-
-	/* wake all idle kthreads so they can exit */
-	if (atomic_read(&ch->kthreads_idle) > 0) {
-		wake_up_all(&ch->idle_wq);
-
-	} else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
-			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
-		/* start a kthread that will do the xpcDisconnecting callout */
-		xpc_create_kthreads(ch, 1, 1);
-	}
-
-	/* wake those waiting to allocate an entry from the local msg queue */
-	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
-		wake_up(&ch->msg_allocate_wq);
-	}
-
-	spin_lock_irqsave(&ch->lock, *irq_flags);
-}
-
-
-void
-xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
-{
-	/*
-	 * Let the channel's registerer know that the channel is being
-	 * disconnected. We don't want to do this if the registerer was never
-	 * informed of a connection being made.
-	 */
-
-	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
-			"channel=%d\n", reason, ch->partid, ch->number);
-
-		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
-
-		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
-			"channel=%d\n", reason, ch->partid, ch->number);
-	}
-}
-
-
-/*
- * Wait for a message entry to become available for the specified channel,
- * but don't wait any longer than 1 jiffy.
- */
-static enum xpc_retval
-xpc_allocate_msg_wait(struct xpc_channel *ch)
-{
-	enum xpc_retval ret;
-
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		DBUG_ON(ch->reason == xpcInterrupted);  // >>> Is this true?
-		return ch->reason;
-	}
-
-	atomic_inc(&ch->n_on_msg_allocate_wq);
-	ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
-	atomic_dec(&ch->n_on_msg_allocate_wq);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		ret = ch->reason;
-		DBUG_ON(ch->reason == xpcInterrupted);  // >>> Is this true?
-	} else if (ret == 0) {
-		ret = xpcTimeout;
-	} else {
-		ret = xpcInterrupted;
-	}
-
-	return ret;
-}
-
-
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel.
- */
-static enum xpc_retval
-xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
-			struct xpc_msg **address_of_msg)
-{
-	struct xpc_msg *msg;
-	enum xpc_retval ret;
-	s64 put;
-
-
-	/* this reference will be dropped in xpc_send_msg() */
-	xpc_msgqueue_ref(ch);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-	if (!(ch->flags & XPC_C_CONNECTED)) {
-		xpc_msgqueue_deref(ch);
-		return xpcNotConnected;
-	}
-
-
-	/*
-	 * Get the next available message entry from the local message queue.
-	 * If none are available, we'll make sure that we grab the latest
-	 * GP values.
-	 */
-	ret = xpcTimeout;
-
-	while (1) {
-
-		put = (volatile s64) ch->w_local_GP.put;
-		if (put - (volatile s64) ch->w_remote_GP.get <
-							ch->local_nentries) {
-
-			/* There are available message entries. We need to try
-			 * to secure one for ourselves. We'll do this by trying
-			 * to increment w_local_GP.put as long as someone else
-			 * doesn't beat us to it. If they do, we'll have to
-			 * try again.
-		 	 */
-			if (cmpxchg(&ch->w_local_GP.put, put, put + 1) ==
-									put) {
-				/* we got the entry referenced by put */
-				break;
-			}
-			continue;	/* try again */
-		}
-
-
-		/*
-		 * There aren't any available msg entries at this time.
-		 *
-		 * In waiting for a message entry to become available,
-		 * we set a timeout in case the other side is not
-		 * sending completion IPIs. This lets us fake an IPI
-		 * that will cause the IPI handler to fetch the latest
-		 * GP values as if an IPI was sent by the other side.
-		 */
-		if (ret == xpcTimeout) {
-			xpc_IPI_send_local_msgrequest(ch);
-		}
-
-		if (flags & XPC_NOWAIT) {
-			xpc_msgqueue_deref(ch);
-			return xpcNoWait;
-		}
-
-		ret = xpc_allocate_msg_wait(ch);
-		if (ret != xpcInterrupted && ret != xpcTimeout) {
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-
-	/* get the message's address and initialize it */
-	msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-				(put % ch->local_nentries) * ch->msg_size);
-
-
-	DBUG_ON(msg->flags != 0);
-	msg->number = put;
-
-	dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
-		"msg_number=%ld, partid=%d, channel=%d\n", put + 1,
-		(void *) msg, msg->number, ch->partid, ch->number);
-
-	*address_of_msg = msg;
-
-	return xpcSuccess;
-}
-
-
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel. NOTE that this routine can sleep waiting for a message
- * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel #.
- *	flags - see xpc.h for valid flags.
- *	payload - address of the allocated payload area pointer (filled in on
- * 	          return) in which the user-defined message is constructed.
- */
-enum xpc_retval
-xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	enum xpc_retval ret = xpcUnknownReason;
-	struct xpc_msg *msg = NULL;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
-	*payload = NULL;
-
-	if (xpc_part_ref(part)) {
-		ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
-		xpc_part_deref(part);
-
-		if (msg != NULL) {
-			*payload = &msg->payload;
-		}
-	}
-
-	return ret;
-}
-
-
-/*
- * Now we actually send the messages that are ready to be sent by advancing
- * the local message queue's Put value and then send an IPI to the recipient
- * partition.
- */
-static void
-xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
-{
-	struct xpc_msg *msg;
-	s64 put = initial_put + 1;
-	int send_IPI = 0;
-
-
-	while (1) {
-
-		while (1) {
-			if (put == (volatile s64) ch->w_local_GP.put) {
-				break;
-			}
-
-			msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-			       (put % ch->local_nentries) * ch->msg_size);
-
-			if (!(msg->flags & XPC_M_READY)) {
-				break;
-			}
-
-			put++;
-		}
-
-		if (put == initial_put) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
-								initial_put) {
-			/* someone else beat us to it */
-			DBUG_ON((volatile s64) ch->local_GP->put < initial_put);
-			break;
-		}
-
-		/* we just set the new value of local_GP->put */
-
-		dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
-			"channel=%d\n", put, ch->partid, ch->number);
-
-		send_IPI = 1;
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->put is not XPC_M_READY or that local_GP->put
-		 * equals w_local_GP.put, so we'll go have a look.
-		 */
-		initial_put = put;
-	}
-
-	if (send_IPI) {
-		xpc_IPI_send_msgrequest(ch);
-	}
-}
-
-
-/*
- * Common code that does the actual sending of the message by advancing the
- * local message queue's Put value and sends an IPI to the partition the
- * message is being sent to.
- */
-static enum xpc_retval
-xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
-			xpc_notify_func func, void *key)
-{
-	enum xpc_retval ret = xpcSuccess;
-	struct xpc_notify *notify = notify;
-	s64 put, msg_number = msg->number;
-
-
-	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
-	DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) !=
-					msg_number % ch->local_nentries);
-	DBUG_ON(msg->flags & XPC_M_READY);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		/* drop the reference grabbed in xpc_allocate_msg() */
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-
-	if (notify_type != 0) {
-		/*
-		 * Tell the remote side to send an ACK interrupt when the
-		 * message has been delivered.
-		 */
-		msg->flags |= XPC_M_INTERRUPT;
-
-		atomic_inc(&ch->n_to_notify);
-
-		notify = &ch->notify_queue[msg_number % ch->local_nentries];
-		notify->func = func;
-		notify->key = key;
-		notify->type = notify_type;
-
-		// >>> is a mb() needed here?
-
-		if (ch->flags & XPC_C_DISCONNECTING) {
-			/*
-			 * An error occurred between our last error check and
-			 * this one. We will try to clear the type field from
-			 * the notify entry. If we succeed then
-			 * xpc_disconnect_channel() didn't already process
-			 * the notify entry.
-			 */
-			if (cmpxchg(&notify->type, notify_type, 0) ==
-								notify_type) {
-				atomic_dec(&ch->n_to_notify);
-				ret = ch->reason;
-			}
-
-			/* drop the reference grabbed in xpc_allocate_msg() */
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-	msg->flags |= XPC_M_READY;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->put.
-	 */
-	mb();
-
-	/* see if the message is next in line to be sent, if so send it */
-
-	put = ch->local_GP->put;
-	if (put == msg_number) {
-		xpc_send_msgs(ch, put);
-	}
-
-	/* drop the reference grabbed in xpc_allocate_msg() */
-	xpc_msgqueue_deref(ch);
-	return ret;
-}
-
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate() on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be received, nor will
- * notification be given when it does happen. Once this routine has returned
- * the message entry allocated via xpc_initiate_allocate() is no longer
- * accessable to the caller.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- */
-enum xpc_retval
-xpc_initiate_send(partid_t partid, int ch_number, void *payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
-		partid, ch_number);
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
-
-	ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
-
-	return ret;
-}
-
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be sent. Once this routine
- * has returned the message entry allocated via xpc_initiate_allocate() is no
- * longer accessable to the caller.
- *
- * Once the remote end of the channel has received the message, the function
- * passed as an argument to xpc_initiate_send_notify() will be called. This
- * allows the sender to free up or re-use any buffers referenced by the
- * message, but does NOT mean the message has been processed at the remote
- * end by a receiver.
- *
- * If this routine returns an error, the caller's function will NOT be called.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- *	func - function to call with asynchronous notification of message
- *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
- *	key - user-defined key to be passed to the function when it's called.
- */
-enum xpc_retval
-xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload,
-				xpc_notify_func func, void *key)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
-		partid, ch_number);
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
-	DBUG_ON(func == NULL);
-
-	ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
-								func, key);
-	return ret;
-}
-
-
-static struct xpc_msg *
-xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	struct xpc_msg *remote_msg, *msg;
-	u32 msg_index, nmsgs;
-	u64 msg_offset;
-	enum xpc_retval ret;
-
-
-	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
-		/* we were interrupted by a signal */
-		return NULL;
-	}
-
-	while (get >= ch->next_msg_to_pull) {
-
-		/* pull as many messages as are ready and able to be pulled */
-
-		msg_index = ch->next_msg_to_pull % ch->remote_nentries;
-
-		DBUG_ON(ch->next_msg_to_pull >=
-					(volatile s64) ch->w_remote_GP.put);
-		nmsgs =  (volatile s64) ch->w_remote_GP.put -
-						ch->next_msg_to_pull;
-		if (msg_index + nmsgs > ch->remote_nentries) {
-			/* ignore the ones that wrap the msg queue for now */
-			nmsgs = ch->remote_nentries - msg_index;
-		}
-
-		msg_offset = msg_index * ch->msg_size;
-		msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-								msg_offset);
-		remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa +
-								msg_offset);
-
-		if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
-				nmsgs * ch->msg_size)) != xpcSuccess) {
-
-			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
-				" msg %ld from partition %d, channel=%d, "
-				"ret=%d\n", nmsgs, ch->next_msg_to_pull,
-				ch->partid, ch->number, ret);
-
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			mutex_unlock(&ch->msg_to_pull_mutex);
-			return NULL;
-		}
-
-		mb();	/* >>> this may not be needed, we're not sure */
-
-		ch->next_msg_to_pull += nmsgs;
-	}
-
-	mutex_unlock(&ch->msg_to_pull_mutex);
-
-	/* return the message we were looking for */
-	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
-	msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset);
-
-	return msg;
-}
-
-
-/*
- * Get a message to be delivered.
- */
-static struct xpc_msg *
-xpc_get_deliverable_msg(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg = NULL;
-	s64 get;
-
-
-	do {
-		if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) {
-			break;
-		}
-
-		get = (volatile s64) ch->w_local_GP.get;
-		if (get == (volatile s64) ch->w_remote_GP.put) {
-			break;
-		}
-
-		/* There are messages waiting to be pulled and delivered.
-		 * We need to try to secure one for ourselves. We'll do this
-		 * by trying to increment w_local_GP.get and hope that no one
-		 * else beats us to it. If they do, we'll we'll simply have
-		 * to try again for the next one.
-	 	 */
-
-		if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
-			/* we got the entry referenced by get */
-
-			dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
-				"partid=%d, channel=%d\n", get + 1,
-				ch->partid, ch->number);
-
-			/* pull the message from the remote partition */
-
-			msg = xpc_pull_remote_msg(ch, get);
-
-			DBUG_ON(msg != NULL && msg->number != get);
-			DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
-			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
-
-			break;
-		}
-
-	} while (1);
-
-	return msg;
-}
-
-
-/*
- * Deliver a message to its intended recipient.
- */
-void
-xpc_deliver_msg(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-
-
-	if ((msg = xpc_get_deliverable_msg(ch)) != NULL) {
-
-		/*
-		 * This ref is taken to protect the payload itself from being
-		 * freed before the user is finished with it, which the user
-		 * indicates by calling xpc_initiate_received().
-		 */
-		xpc_msgqueue_ref(ch);
-
-		atomic_inc(&ch->kthreads_active);
-
-		if (ch->func != NULL) {
-			dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) msg, msg->number, ch->partid,
-				ch->number);
-
-			/* deliver the message to its intended recipient */
-			ch->func(xpcMsgReceived, ch->partid, ch->number,
-					&msg->payload, ch->key);
-
-			dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) msg, msg->number, ch->partid,
-				ch->number);
-		}
-
-		atomic_dec(&ch->kthreads_active);
-	}
-}
-
-
-/*
- * Now we actually acknowledge the messages that have been delivered and ack'd
- * by advancing the cached remote message queue's Get value and if requested
- * send an IPI to the message sender's partition.
- */
-static void
-xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
-{
-	struct xpc_msg *msg;
-	s64 get = initial_get + 1;
-	int send_IPI = 0;
-
-
-	while (1) {
-
-		while (1) {
-			if (get == (volatile s64) ch->w_local_GP.get) {
-				break;
-			}
-
-			msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-			       (get % ch->remote_nentries) * ch->msg_size);
-
-			if (!(msg->flags & XPC_M_DONE)) {
-				break;
-			}
-
-			msg_flags |= msg->flags;
-			get++;
-		}
-
-		if (get == initial_get) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
-								initial_get) {
-			/* someone else beat us to it */
-			DBUG_ON((volatile s64) ch->local_GP->get <=
-								initial_get);
-			break;
-		}
-
-		/* we just set the new value of local_GP->get */
-
-		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
-			"channel=%d\n", get, ch->partid, ch->number);
-
-		send_IPI = (msg_flags & XPC_M_INTERRUPT);
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->get is not XPC_M_DONE or that local_GP->get
-		 * equals w_local_GP.get, so we'll go have a look.
-		 */
-		initial_get = get;
-	}
-
-	if (send_IPI) {
-		xpc_IPI_send_msgrequest(ch);
-	}
-}
-
-
-/*
- * Acknowledge receipt of a delivered message.
- *
- * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
- * that sent the message.
- *
- * This function, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # message received on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- */
-void
-xpc_initiate_received(partid_t partid, int ch_number, void *payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_channel *ch;
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	s64 get, msg_number = msg->number;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
-	ch = &part->channels[ch_number];
-
-	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
-		(void *) msg, msg_number, ch->partid, ch->number);
-
-	DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) !=
-					msg_number % ch->remote_nentries);
-	DBUG_ON(msg->flags & XPC_M_DONE);
-
-	msg->flags |= XPC_M_DONE;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->get.
-	 */
-	mb();
-
-	/*
-	 * See if this message is next in line to be acknowledged as having
-	 * been delivered.
-	 */
-	get = ch->local_GP->get;
-	if (get == msg_number) {
-		xpc_acknowledge_msgs(ch, get, msg->flags);
-	}
-
-	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg()  */
-	xpc_msgqueue_deref(ch);
-}
-
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
deleted file mode 100644
index 81785b7..0000000
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2007 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) support - standard version.
- *
- *	XPC provides a message passing capability that crosses partition
- *	boundaries. This module is made up of two parts:
- *
- *	    partition	This part detects the presence/absence of other
- *			partitions. It provides a heartbeat and monitors
- *			the heartbeats of other partitions.
- *
- *	    channel	This part manages the channels and sends/receives
- *			messages across them to/from other partitions.
- *
- *	There are a couple of additional functions residing in XP, which
- *	provide an interface to XPC for its users.
- *
- *
- *	Caveats:
- *
- *	  . We currently have no way to determine which nasid an IPI came
- *	    from. Thus, xpc_IPI_send() does a remote AMO write followed by
- *	    an IPI. The AMO indicates where data is to be pulled from, so
- *	    after the IPI arrives, the remote partition checks the AMO word.
- *	    The IPI can actually arrive before the AMO however, so other code
- *	    must periodically check for this case. Also, remote AMO operations
- *	    do not reliably time out. Thus we do a remote PIO read solely to
- *	    know whether the remote partition is down and whether we should
- *	    stop sending IPIs to it. This remote PIO read operation is set up
- *	    in a special nofault region so SAL knows to ignore (and cleanup)
- *	    any errors due to the remote AMO write, PIO read, and/or PIO
- *	    write operations.
- *
- *	    If/when new hardware solves this IPI problem, we should abandon
- *	    the current approach.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/completion.h>
-#include <linux/kdebug.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/uaccess.h>
-#include <asm/sn/xpc.h>
-
-
-/* define two XPC debug device structures to be used with dev_dbg() et al */
-
-struct device_driver xpc_dbg_name = {
-	.name = "xpc"
-};
-
-struct device xpc_part_dbg_subname = {
-	.bus_id = {0},		/* set to "part" at xpc_init() time */
-	.driver = &xpc_dbg_name
-};
-
-struct device xpc_chan_dbg_subname = {
-	.bus_id = {0},		/* set to "chan" at xpc_init() time */
-	.driver = &xpc_dbg_name
-};
-
-struct device *xpc_part = &xpc_part_dbg_subname;
-struct device *xpc_chan = &xpc_chan_dbg_subname;
-
-
-static int xpc_kdebug_ignore;
-
-
-/* systune related variables for /proc/sys directories */
-
-static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
-static int xpc_hb_min_interval = 1;
-static int xpc_hb_max_interval = 10;
-
-static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
-static int xpc_hb_check_min_interval = 10;
-static int xpc_hb_check_max_interval = 120;
-
-int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
-static int xpc_disengage_request_min_timelimit = 0;
-static int xpc_disengage_request_max_timelimit = 120;
-
-static ctl_table xpc_sys_xpc_hb_dir[] = {
-	{
-		.ctl_name 	= CTL_UNNUMBERED,
-		.procname	= "hb_interval",
-		.data		= &xpc_hb_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &xpc_hb_min_interval,
-		.extra2		= &xpc_hb_max_interval
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "hb_check_interval",
-		.data		= &xpc_hb_check_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &xpc_hb_check_min_interval,
-		.extra2		= &xpc_hb_check_max_interval
-	},
-	{}
-};
-static ctl_table xpc_sys_xpc_dir[] = {
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "hb",
-		.mode		= 0555,
-		.child		= xpc_sys_xpc_hb_dir
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "disengage_request_timelimit",
-		.data		= &xpc_disengage_request_timelimit,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &xpc_disengage_request_min_timelimit,
-		.extra2		= &xpc_disengage_request_max_timelimit
-	},
-	{}
-};
-static ctl_table xpc_sys_dir[] = {
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "xpc",
-		.mode		= 0555,
-		.child		= xpc_sys_xpc_dir
-	},
-	{}
-};
-static struct ctl_table_header *xpc_sysctl;
-
-/* non-zero if any remote partition disengage request was timed out */
-int xpc_disengage_request_timedout;
-
-/* #of IRQs received */
-static atomic_t xpc_act_IRQ_rcvd;
-
-/* IRQ handler notifies this wait queue on receipt of an IRQ */
-static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
-
-static unsigned long xpc_hb_check_timeout;
-
-/* notification that the xpc_hb_checker thread has exited */
-static DECLARE_COMPLETION(xpc_hb_checker_exited);
-
-/* notification that the xpc_discovery thread has exited */
-static DECLARE_COMPLETION(xpc_discovery_exited);
-
-
-static struct timer_list xpc_hb_timer;
-
-
-static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
-
-
-static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
-static struct notifier_block xpc_reboot_notifier = {
-	.notifier_call = xpc_system_reboot,
-};
-
-static int xpc_system_die(struct notifier_block *, unsigned long, void *);
-static struct notifier_block xpc_die_notifier = {
-	.notifier_call = xpc_system_die,
-};
-
-
-/*
- * Timer function to enforce the timelimit on the partition disengage request.
- */
-static void
-xpc_timeout_partition_disengage_request(unsigned long data)
-{
-	struct xpc_partition *part = (struct xpc_partition *) data;
-
-
-	DBUG_ON(jiffies < part->disengage_request_timeout);
-
-	(void) xpc_partition_disengaged(part);
-
-	DBUG_ON(part->disengage_request_timeout != 0);
-	DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
-}
-
-
-/*
- * Notify the heartbeat check thread that an IRQ has been received.
- */
-static irqreturn_t
-xpc_act_IRQ_handler(int irq, void *dev_id)
-{
-	atomic_inc(&xpc_act_IRQ_rcvd);
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Timer to produce the heartbeat.  The timer structures function is
- * already set when this is initially called.  A tunable is used to
- * specify when the next timeout should occur.
- */
-static void
-xpc_hb_beater(unsigned long dummy)
-{
-	xpc_vars->heartbeat++;
-
-	if (jiffies >= xpc_hb_check_timeout) {
-		wake_up_interruptible(&xpc_act_IRQ_wq);
-	}
-
-	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
-	add_timer(&xpc_hb_timer);
-}
-
-
-/*
- * This thread is responsible for nearly all of the partition
- * activation/deactivation.
- */
-static int
-xpc_hb_checker(void *ignore)
-{
-	int last_IRQ_count = 0;
-	int new_IRQ_count;
-	int force_IRQ=0;
-
-
-	/* this thread was marked active by xpc_hb_init() */
-
-	daemonize(XPC_HB_CHECK_THREAD_NAME);
-
-	set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
-
-	/* set our heartbeating to other partitions into motion */
-	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
-	xpc_hb_beater(0);
-
-	while (!(volatile int) xpc_exiting) {
-
-		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
-			"been received\n",
-			(int) (xpc_hb_check_timeout - jiffies),
-			atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
-
-
-		/* checking of remote heartbeats is skewed by IRQ handling */
-		if (jiffies >= xpc_hb_check_timeout) {
-			dev_dbg(xpc_part, "checking remote heartbeats\n");
-			xpc_check_remote_hb();
-
-			/*
-			 * We need to periodically recheck to ensure no
-			 * IPI/AMO pairs have been missed.  That check
-			 * must always reset xpc_hb_check_timeout.
-			 */
-			force_IRQ = 1;
-		}
-
-
-		/* check for outstanding IRQs */
-		new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
-		if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
-			force_IRQ = 0;
-
-			dev_dbg(xpc_part, "found an IRQ to process; will be "
-				"resetting xpc_hb_check_timeout\n");
-
-			last_IRQ_count += xpc_identify_act_IRQ_sender();
-			if (last_IRQ_count < new_IRQ_count) {
-				/* retry once to help avoid missing AMO */
-				(void) xpc_identify_act_IRQ_sender();
-			}
-			last_IRQ_count = new_IRQ_count;
-
-			xpc_hb_check_timeout = jiffies +
-					   (xpc_hb_check_interval * HZ);
-		}
-
-		/* wait for IRQ or timeout */
-		(void) wait_event_interruptible(xpc_act_IRQ_wq,
-			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-					jiffies >= xpc_hb_check_timeout ||
-						(volatile int) xpc_exiting));
-	}
-
-	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
-
-
-	/* mark this thread as having exited */
-	complete(&xpc_hb_checker_exited);
-	return 0;
-}
-
-
-/*
- * This thread will attempt to discover other partitions to activate
- * based on info provided by SAL. This new thread is short lived and
- * will exit once discovery is complete.
- */
-static int
-xpc_initiate_discovery(void *ignore)
-{
-	daemonize(XPC_DISCOVERY_THREAD_NAME);
-
-	xpc_discovery();
-
-	dev_dbg(xpc_part, "discovery thread is exiting\n");
-
-	/* mark this thread as having exited */
-	complete(&xpc_discovery_exited);
-	return 0;
-}
-
-
-/*
- * Establish first contact with the remote partititon. This involves pulling
- * the XPC per partition variables from the remote partition and waiting for
- * the remote partition to pull ours.
- */
-static enum xpc_retval
-xpc_make_first_contact(struct xpc_partition *part)
-{
-	enum xpc_retval ret;
-
-
-	while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
-		if (ret != xpcRetry) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			return ret;
-		}
-
-		dev_dbg(xpc_chan, "waiting to make first contact with "
-			"partition %d\n", XPC_PARTID(part));
-
-		/* wait a 1/4 of a second or so */
-		(void) msleep_interruptible(250);
-
-		if (part->act_state == XPC_P_DEACTIVATING) {
-			return part->reason;
-		}
-	}
-
-	return xpc_mark_partition_active(part);
-}
-
-
-/*
- * The first kthread assigned to a newly activated partition is the one
- * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
- * that kthread until the partition is brought down, at which time that kthread
- * returns back to XPC HB. (The return of that kthread will signify to XPC HB
- * that XPC has dismantled all communication infrastructure for the associated
- * partition.) This kthread becomes the channel manager for that partition.
- *
- * Each active partition has a channel manager, who, besides connecting and
- * disconnecting channels, will ensure that each of the partition's connected
- * channels has the required number of assigned kthreads to get the work done.
- */
-static void
-xpc_channel_mgr(struct xpc_partition *part)
-{
-	while (part->act_state != XPC_P_DEACTIVATING ||
-			atomic_read(&part->nchannels_active) > 0 ||
-					!xpc_partition_disengaged(part)) {
-
-		xpc_process_channel_activity(part);
-
-
-		/*
-		 * Wait until we've been requested to activate kthreads or
-		 * all of the channel's message queues have been torn down or
-		 * a signal is pending.
-		 *
-		 * The channel_mgr_requests is set to 1 after being awakened,
-		 * This is done to prevent the channel mgr from making one pass
-		 * through the loop for each request, since he will
-		 * be servicing all the requests in one pass. The reason it's
-		 * set to 1 instead of 0 is so that other kthreads will know
-		 * that the channel mgr is running and won't bother trying to
-		 * wake him up.
-		 */
-		atomic_dec(&part->channel_mgr_requests);
-		(void) wait_event_interruptible(part->channel_mgr_wq,
-				(atomic_read(&part->channel_mgr_requests) > 0 ||
-				(volatile u64) part->local_IPI_amo != 0 ||
-				((volatile u8) part->act_state ==
-							XPC_P_DEACTIVATING &&
-				atomic_read(&part->nchannels_active) == 0 &&
-				xpc_partition_disengaged(part))));
-		atomic_set(&part->channel_mgr_requests, 1);
-
-		// >>> Does it need to wakeup periodically as well? In case we
-		// >>> miscalculated the #of kthreads to wakeup or create?
-	}
-}
-
-
-/*
- * When XPC HB determines that a partition has come up, it will create a new
- * kthread and that kthread will call this function to attempt to set up the
- * basic infrastructure used for Cross Partition Communication with the newly
- * upped partition.
- *
- * The kthread that was created by XPC HB and which setup the XPC
- * infrastructure will remain assigned to the partition until the partition
- * goes down. At which time the kthread will teardown the XPC infrastructure
- * and then exit.
- *
- * XPC HB will put the remote partition's XPC per partition specific variables
- * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
- * calling xpc_partition_up().
- */
-static void
-xpc_partition_up(struct xpc_partition *part)
-{
-	DBUG_ON(part->channels != NULL);
-
-	dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
-
-	if (xpc_setup_infrastructure(part) != xpcSuccess) {
-		return;
-	}
-
-	/*
-	 * The kthread that XPC HB called us with will become the
-	 * channel manager for this partition. It will not return
-	 * back to XPC HB until the partition's XPC infrastructure
-	 * has been dismantled.
-	 */
-
-	(void) xpc_part_ref(part);	/* this will always succeed */
-
-	if (xpc_make_first_contact(part) == xpcSuccess) {
-		xpc_channel_mgr(part);
-	}
-
-	xpc_part_deref(part);
-
-	xpc_teardown_infrastructure(part);
-}
-
-
-static int
-xpc_activating(void *__partid)
-{
-	partid_t partid = (u64) __partid;
-	struct xpc_partition *part = &xpc_partitions[partid];
-	unsigned long irq_flags;
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
-	int ret;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		part->act_state = XPC_P_INACTIVE;
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		part->remote_rp_pa = 0;
-		return 0;
-	}
-
-	/* indicate the thread is activating */
-	DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
-	part->act_state = XPC_P_ACTIVATING;
-
-	XPC_SET_REASON(part, 0, 0);
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	dev_dbg(xpc_part, "bringing partition %d up\n", partid);
-
-	daemonize("xpc%02d", partid);
-
-	/*
-	 * This thread needs to run at a realtime priority to prevent a
-	 * significant performance degradation.
-	 */
-	ret = sched_setscheduler(current, SCHED_FIFO, &param);
-	if (ret != 0) {
-		dev_warn(xpc_part, "unable to set pid %d to a realtime "
-			"priority, ret=%d\n", current->pid, ret);
-	}
-
-	/* allow this thread and its children to run on any CPU */
-	set_cpus_allowed(current, CPU_MASK_ALL);
-
-	/*
-	 * Register the remote partition's AMOs with SAL so it can handle
-	 * and cleanup errors within that address range should the remote
-	 * partition go down. We don't unregister this range because it is
-	 * difficult to tell when outstanding writes to the remote partition
-	 * are finished and thus when it is safe to unregister. This should
-	 * not result in wasted space in the SAL xp_addr_region table because
-	 * we should get the same page for remote_amos_page_pa after module
-	 * reloads and system reboots.
-	 */
-	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
-							PAGE_SIZE, 1) < 0) {
-		dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
-			"xp_addr region\n", partid);
-
-		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		part->remote_rp_pa = 0;
-		return 0;
-	}
-
-	xpc_allow_hb(partid, xpc_vars);
-	xpc_IPI_send_activated(part);
-
-
-	/*
-	 * xpc_partition_up() holds this thread and marks this partition as
-	 * XPC_P_ACTIVE by calling xpc_hb_mark_active().
-	 */
-	(void) xpc_partition_up(part);
-
-	xpc_disallow_hb(partid, xpc_vars);
-	xpc_mark_partition_inactive(part);
-
-	if (part->reason == xpcReactivating) {
-		/* interrupting ourselves results in activating partition */
-		xpc_IPI_send_reactivate(part);
-	}
-
-	return 0;
-}
-
-
-void
-xpc_activate_partition(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-	unsigned long irq_flags;
-	pid_t pid;
-
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	DBUG_ON(part->act_state != XPC_P_INACTIVE);
-
-	part->act_state = XPC_P_ACTIVATION_REQ;
-	XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
-
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
-	if (unlikely(pid <= 0)) {
-		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-	}
-}
-
-
-/*
- * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
- * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not.  >>> If it has, then wake up the
- * associated kthread to handle it.
- *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
- * running on other partitions.
- *
- * Noteworthy Arguments:
- *
- *	irq - Interrupt ReQuest number. NOT USED.
- *
- *	dev_id - partid of IPI's potential sender.
- */
-irqreturn_t
-xpc_notify_IRQ_handler(int irq, void *dev_id)
-{
-	partid_t partid = (partid_t) (u64) dev_id;
-	struct xpc_partition *part = &xpc_partitions[partid];
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		xpc_part_deref(part);
-	}
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
- * was received.
- */
-void
-xpc_dropped_IPI_check(struct xpc_partition *part)
-{
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		part->dropped_IPI_timer.expires = jiffies +
-							XPC_P_DROPPED_IPI_WAIT;
-		add_timer(&part->dropped_IPI_timer);
-		xpc_part_deref(part);
-	}
-}
-
-
-void
-xpc_activate_kthreads(struct xpc_channel *ch, int needed)
-{
-	int idle = atomic_read(&ch->kthreads_idle);
-	int assigned = atomic_read(&ch->kthreads_assigned);
-	int wakeup;
-
-
-	DBUG_ON(needed <= 0);
-
-	if (idle > 0) {
-		wakeup = (needed > idle) ? idle : needed;
-		needed -= wakeup;
-
-		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
-			"channel=%d\n", wakeup, ch->partid, ch->number);
-
-		/* only wakeup the requested number of kthreads */
-		wake_up_nr(&ch->idle_wq, wakeup);
-	}
-
-	if (needed <= 0) {
-		return;
-	}
-
-	if (needed + assigned > ch->kthreads_assigned_limit) {
-		needed = ch->kthreads_assigned_limit - assigned;
-		// >>>should never be less than 0
-		if (needed <= 0) {
-			return;
-		}
-	}
-
-	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
-		needed, ch->partid, ch->number);
-
-	xpc_create_kthreads(ch, needed, 0);
-}
-
-
-/*
- * This function is where XPC's kthreads wait for messages to deliver.
- */
-static void
-xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
-{
-	do {
-		/* deliver messages to their intended recipients */
-
-		while ((volatile s64) ch->w_local_GP.get <
-				(volatile s64) ch->w_remote_GP.put &&
-					!((volatile u32) ch->flags &
-						XPC_C_DISCONNECTING)) {
-			xpc_deliver_msg(ch);
-		}
-
-		if (atomic_inc_return(&ch->kthreads_idle) >
-						ch->kthreads_idle_limit) {
-			/* too many idle kthreads on this channel */
-			atomic_dec(&ch->kthreads_idle);
-			break;
-		}
-
-		dev_dbg(xpc_chan, "idle kthread calling "
-			"wait_event_interruptible_exclusive()\n");
-
-		(void) wait_event_interruptible_exclusive(ch->idle_wq,
-				((volatile s64) ch->w_local_GP.get <
-					(volatile s64) ch->w_remote_GP.put ||
-				((volatile u32) ch->flags &
-						XPC_C_DISCONNECTING)));
-
-		atomic_dec(&ch->kthreads_idle);
-
-	} while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING));
-}
-
-
-static int
-xpc_daemonize_kthread(void *args)
-{
-	partid_t partid = XPC_UNPACK_ARG1(args);
-	u16 ch_number = XPC_UNPACK_ARG2(args);
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_channel *ch;
-	int n_needed;
-	unsigned long irq_flags;
-
-
-	daemonize("xpc%02dc%d", partid, ch_number);
-
-	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
-		partid, ch_number);
-
-	ch = &part->channels[ch_number];
-
-	if (!(ch->flags & XPC_C_DISCONNECTING)) {
-
-		/* let registerer know that connection has been established */
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
-			ch->flags |= XPC_C_CONNECTEDCALLOUT;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			xpc_connected_callout(ch);
-
-			spin_lock_irqsave(&ch->lock, irq_flags);
-			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			/*
-			 * It is possible that while the callout was being
-			 * made that the remote partition sent some messages.
-			 * If that is the case, we may need to activate
-			 * additional kthreads to help deliver them. We only
-			 * need one less than total #of messages to deliver.
-			 */
-			n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
-			if (n_needed > 0 &&
-					!(ch->flags & XPC_C_DISCONNECTING)) {
-				xpc_activate_kthreads(ch, n_needed);
-			}
-		} else {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-		}
-
-		xpc_kthread_waitmsgs(part, ch);
-	}
-
-	/* let registerer know that connection is disconnecting */
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-	if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
-			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
-		ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-		xpc_disconnect_callout(ch, xpcDisconnecting);
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
-	}
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
-		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
-		}
-	}
-
-	xpc_msgqueue_deref(ch);
-
-	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
-		partid, ch_number);
-
-	xpc_part_deref(part);
-	return 0;
-}
-
-
-/*
- * For each partition that XPC has established communications with, there is
- * a minimum of one kernel thread assigned to perform any operation that
- * may potentially sleep or block (basically the callouts to the asynchronous
- * functions registered via xpc_connect()).
- *
- * Additional kthreads are created and destroyed by XPC as the workload
- * demands.
- *
- * A kthread is assigned to one of the active channels that exists for a given
- * partition.
- */
-void
-xpc_create_kthreads(struct xpc_channel *ch, int needed,
-			int ignore_disconnecting)
-{
-	unsigned long irq_flags;
-	pid_t pid;
-	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-
-
-	while (needed-- > 0) {
-
-		/*
-		 * The following is done on behalf of the newly created
-		 * kthread. That kthread is responsible for doing the
-		 * counterpart to the following before it exits.
-		 */
-		if (ignore_disconnecting) {
-			if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
-				/* kthreads assigned had gone to zero */
-				BUG_ON(!(ch->flags &
-					XPC_C_DISCONNECTINGCALLOUT_MADE));
-				break;
-			}
-
-		} else if (ch->flags & XPC_C_DISCONNECTING) {
-			break;
-
-		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
-			if (atomic_inc_return(&part->nchannels_engaged) == 1)
-				xpc_mark_partition_engaged(part);
-		}
-		(void) xpc_part_ref(part);
-		xpc_msgqueue_ref(ch);
-
-		pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
-		if (pid < 0) {
-			/* the fork failed */
-
-			/*
-			 * NOTE: if (ignore_disconnecting &&
-			 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
-			 * then we'll deadlock if all other kthreads assigned
-			 * to this channel are blocked in the channel's
-			 * registerer, because the only thing that will unblock
-			 * them is the xpcDisconnecting callout that this
-			 * failed kernel_thread would have made.
-			 */
-
-			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
-			    atomic_dec_return(&part->nchannels_engaged) == 0) {
-				xpc_mark_partition_disengaged(part);
-				xpc_IPI_send_disengage(part);
-			}
-			xpc_msgqueue_deref(ch);
-			xpc_part_deref(part);
-
-			if (atomic_read(&ch->kthreads_assigned) <
-						ch->kthreads_idle_limit) {
-				/*
-				 * Flag this as an error only if we have an
-				 * insufficient #of kthreads for the channel
-				 * to function.
-				 */
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
-								&irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-			}
-			break;
-		}
-
-		ch->kthreads_created++;	// >>> temporary debug only!!!
-	}
-}
-
-
-void
-xpc_disconnect_wait(int ch_number)
-{
-	unsigned long irq_flags;
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-	int wakeup_channel_mgr;
-
-
-	/* now wait for all callouts to the caller's function to cease */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (!xpc_part_ref(part)) {
-			continue;
-		}
-
-		ch = &part->channels[ch_number];
-
-		if (!(ch->flags & XPC_C_WDISCONNECT)) {
-			xpc_part_deref(part);
-			continue;
-		}
-
-		wait_for_completion(&ch->wdisconnect_wait);
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
-		wakeup_channel_mgr = 0;
-
-		if (ch->delayed_IPI_flags) {
-			if (part->act_state != XPC_P_DEACTIVATING) {
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-					ch->number, ch->delayed_IPI_flags);
-				spin_unlock(&part->IPI_lock);
-				wakeup_channel_mgr = 1;
-			}
-			ch->delayed_IPI_flags = 0;
-		}
-
-		ch->flags &= ~XPC_C_WDISCONNECT;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-		if (wakeup_channel_mgr) {
-			xpc_wakeup_channel_mgr(part);
-		}
-
-		xpc_part_deref(part);
-	}
-}
-
-
-static void
-xpc_do_exit(enum xpc_retval reason)
-{
-	partid_t partid;
-	int active_part_count, printed_waiting_msg = 0;
-	struct xpc_partition *part;
-	unsigned long printmsg_time, disengage_request_timeout = 0;
-
-
-	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
-	DBUG_ON(xpc_exiting == 1);
-
-	/*
-	 * Let the heartbeat checker thread and the discovery thread
-	 * (if one is running) know that they should exit. Also wake up
-	 * the heartbeat checker thread in case it's sleeping.
-	 */
-	xpc_exiting = 1;
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-
-	/* ignore all incoming interrupts */
-	free_irq(SGI_XPC_ACTIVATE, NULL);
-
-	/* wait for the discovery thread to exit */
-	wait_for_completion(&xpc_discovery_exited);
-
-	/* wait for the heartbeat checker thread to exit */
-	wait_for_completion(&xpc_hb_checker_exited);
-
-
-	/* sleep for a 1/3 of a second or so */
-	(void) msleep_interruptible(300);
-
-
-	/* wait for all partitions to become inactive */
-
-	printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-	xpc_disengage_request_timedout = 0;
-
-	do {
-		active_part_count = 0;
-
-		for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-			part = &xpc_partitions[partid];
-
-			if (xpc_partition_disengaged(part) &&
-					part->act_state == XPC_P_INACTIVE) {
-				continue;
-			}
-
-			active_part_count++;
-
-			XPC_DEACTIVATE_PARTITION(part, reason);
-
-			if (part->disengage_request_timeout >
-						disengage_request_timeout) {
-				disengage_request_timeout =
-						part->disengage_request_timeout;
-			}
-		}
-
-		if (xpc_partition_engaged(-1UL)) {
-			if (time_after(jiffies, printmsg_time)) {
-				dev_info(xpc_part, "waiting for remote "
-					"partitions to disengage, timeout in "
-					"%ld seconds\n",
-					(disengage_request_timeout - jiffies)
-									/ HZ);
-				printmsg_time = jiffies +
-					(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-				printed_waiting_msg = 1;
-			}
-
-		} else if (active_part_count > 0) {
-			if (printed_waiting_msg) {
-				dev_info(xpc_part, "waiting for local partition"
-					" to disengage\n");
-				printed_waiting_msg = 0;
-			}
-
-		} else {
-			if (!xpc_disengage_request_timedout) {
-				dev_info(xpc_part, "all partitions have "
-					"disengaged\n");
-			}
-			break;
-		}
-
-		/* sleep for a 1/3 of a second or so */
-		(void) msleep_interruptible(300);
-
-	} while (1);
-
-	DBUG_ON(xpc_partition_engaged(-1UL));
-
-
-	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->vars_pa = 0;
-
-	/* now it's time to eliminate our heartbeat */
-	del_timer_sync(&xpc_hb_timer);
-	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
-
-	if (reason == xpcUnloading) {
-		/* take ourselves off of the reboot_notifier_list */
-		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
-		(void) unregister_die_notifier(&xpc_die_notifier);
-	}
-
-	/* close down protections for IPI operations */
-	xpc_restrict_IPI_ops();
-
-
-	/* clear the interface to XPC's functions */
-	xpc_clear_interface();
-
-	if (xpc_sysctl) {
-		unregister_sysctl_table(xpc_sysctl);
-	}
-
-	kfree(xpc_remote_copy_buffer_base);
-}
-
-
-/*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	enum xpc_retval reason;
-
-
-	switch (event) {
-	case SYS_RESTART:
-		reason = xpcSystemReboot;
-		break;
-	case SYS_HALT:
-		reason = xpcSystemHalt;
-		break;
-	case SYS_POWER_OFF:
-		reason = xpcSystemPoweroff;
-		break;
-	default:
-		reason = xpcSystemGoingDown;
-	}
-
-	xpc_do_exit(reason);
-	return NOTIFY_DONE;
-}
-
-
-/*
- * Notify other partitions to disengage from all references to our memory.
- */
-static void
-xpc_die_disengage(void)
-{
-	struct xpc_partition *part;
-	partid_t partid;
-	unsigned long engaged;
-	long time, printmsg_time, disengage_request_timeout;
-
-
-	/* keep xpc_hb_checker thread from doing anything (just in case) */
-	xpc_exiting = 1;
-
-	xpc_vars->heartbeating_to_mask = 0;  /* indicate we're deactivated */
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-							remote_vars_version)) {
-
-			/* just in case it was left set by an earlier XPC */
-			xpc_clear_partition_engaged(1UL << partid);
-			continue;
-		}
-
-		if (xpc_partition_engaged(1UL << partid) ||
-					part->act_state != XPC_P_INACTIVE) {
-			xpc_request_partition_disengage(part);
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
-		}
-	}
-
-	time = rtc_time();
-	printmsg_time = time +
-		(XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
-	disengage_request_timeout = time +
-		(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
-
-	/* wait for all other partitions to disengage from us */
-
-	while (1) {
-		engaged = xpc_partition_engaged(-1UL);
-		if (!engaged) {
-			dev_info(xpc_part, "all partitions have disengaged\n");
-			break;
-		}
-
-		time = rtc_time();
-		if (time >= disengage_request_timeout) {
-			for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-				if (engaged & (1UL << partid)) {
-					dev_info(xpc_part, "disengage from "
-						"remote partition %d timed "
-						"out\n", partid);
-				}
-			}
-			break;
-		}
-
-		if (time >= printmsg_time) {
-			dev_info(xpc_part, "waiting for remote partitions to "
-				"disengage, timeout in %ld seconds\n",
-				(disengage_request_timeout - time) /
-						sn_rtc_cycles_per_second);
-			printmsg_time = time +
-					(XPC_DISENGAGE_PRINTMSG_INTERVAL *
-						sn_rtc_cycles_per_second);
-		}
-	}
-}
-
-
-/*
- * This function is called when the system is being restarted or halted due
- * to some sort of system failure. If this is the case we need to notify the
- * other partitions to disengage from all references to our memory.
- * This function can also be called when our heartbeater could be offlined
- * for a time. In this case we need to notify other partitions to not worry
- * about the lack of a heartbeat.
- */
-static int
-xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	switch (event) {
-	case DIE_MACHINE_RESTART:
-	case DIE_MACHINE_HALT:
-		xpc_die_disengage();
-		break;
-
-	case DIE_KDEBUG_ENTER:
-		/* Should lack of heartbeat be ignored by other partitions? */
-		if (!xpc_kdebug_ignore) {
-			break;
-		}
-		/* fall through */
-	case DIE_MCA_MONARCH_ENTER:
-	case DIE_INIT_MONARCH_ENTER:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 1;
-		break;
-
-	case DIE_KDEBUG_LEAVE:
-		/* Is lack of heartbeat being ignored by other partitions? */
-		if (!xpc_kdebug_ignore) {
-			break;
-		}
-		/* fall through */
-	case DIE_MCA_MONARCH_LEAVE:
-	case DIE_INIT_MONARCH_LEAVE:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 0;
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-
-int __init
-xpc_init(void)
-{
-	int ret;
-	partid_t partid;
-	struct xpc_partition *part;
-	pid_t pid;
-	size_t buf_size;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-
-	buf_size = max(XPC_RP_VARS_SIZE,
-				XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
-	xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
-				     GFP_KERNEL, &xpc_remote_copy_buffer_base);
-	if (xpc_remote_copy_buffer == NULL)
-		return -ENOMEM;
-
-	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
-	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
-
-	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
-
-	/*
-	 * The first few fields of each entry of xpc_partitions[] need to
-	 * be initialized now so that calls to xpc_connect() and
-	 * xpc_disconnect() can be made prior to the activation of any remote
-	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
-	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
-	 * PARTITION HAS BEEN ACTIVATED.
-	 */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part));
-
-		part->act_IRQ_rcvd = 0;
-		spin_lock_init(&part->act_lock);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, 0, 0);
-
-		init_timer(&part->disengage_request_timer);
-		part->disengage_request_timer.function =
-				xpc_timeout_partition_disengage_request;
-		part->disengage_request_timer.data = (unsigned long) part;
-
-		part->setup_state = XPC_P_UNSET;
-		init_waitqueue_head(&part->teardown_wq);
-		atomic_set(&part->references, 0);
-	}
-
-	/*
-	 * Open up protections for IPI operations (and AMO operations on
-	 * Shub 1.1 systems).
-	 */
-	xpc_allow_IPI_ops();
-
-	/*
-	 * Interrupts being processed will increment this atomic variable and
-	 * awaken the heartbeat thread which will process the interrupts.
-	 */
-	atomic_set(&xpc_act_IRQ_rcvd, 0);
-
-	/*
-	 * This is safe to do before the xpc_hb_checker thread has started
-	 * because the handler releases a wait queue.  If an interrupt is
-	 * received before the thread is waiting, it will not go to sleep,
-	 * but rather immediately process the interrupt.
-	 */
-	ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
-							"xpc hb", NULL);
-	if (ret != 0) {
-		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
-			"errno=%d\n", -ret);
-
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
-	}
-
-	/*
-	 * Fill the partition reserved page with the information needed by
-	 * other partitions to discover we are alive and establish initial
-	 * communications.
-	 */
-	xpc_rsvd_page = xpc_rsvd_page_init();
-	if (xpc_rsvd_page == NULL) {
-		dev_err(xpc_part, "could not setup our reserved page\n");
-
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
-	}
-
-
-	/* add ourselves to the reboot_notifier_list */
-	ret = register_reboot_notifier(&xpc_reboot_notifier);
-	if (ret != 0) {
-		dev_warn(xpc_part, "can't register reboot notifier\n");
-	}
-
-	/* add ourselves to the die_notifier list */
-	ret = register_die_notifier(&xpc_die_notifier);
-	if (ret != 0) {
-		dev_warn(xpc_part, "can't register die notifier\n");
-	}
-
-	init_timer(&xpc_hb_timer);
-	xpc_hb_timer.function = xpc_hb_beater;
-
-	/*
-	 * The real work-horse behind xpc.  This processes incoming
-	 * interrupts and monitors remote heartbeats.
-	 */
-	pid = kernel_thread(xpc_hb_checker, NULL, 0);
-	if (pid < 0) {
-		dev_err(xpc_part, "failed while forking hb check thread\n");
-
-		/* indicate to others that our reserved page is uninitialized */
-		xpc_rsvd_page->vars_pa = 0;
-
-		/* take ourselves off of the reboot_notifier_list */
-		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
-		(void) unregister_die_notifier(&xpc_die_notifier);
-
-		del_timer_sync(&xpc_hb_timer);
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-
-		kfree(xpc_remote_copy_buffer_base);
-		return -EBUSY;
-	}
-
-
-	/*
-	 * Startup a thread that will attempt to discover other partitions to
-	 * activate based on info provided by SAL. This new thread is short
-	 * lived and will exit once discovery is complete.
-	 */
-	pid = kernel_thread(xpc_initiate_discovery, NULL, 0);
-	if (pid < 0) {
-		dev_err(xpc_part, "failed while forking discovery thread\n");
-
-		/* mark this new thread as a non-starter */
-		complete(&xpc_discovery_exited);
-
-		xpc_do_exit(xpcUnloading);
-		return -EBUSY;
-	}
-
-
-	/* set the interface to point at XPC's functions */
-	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
-			  xpc_initiate_allocate, xpc_initiate_send,
-			  xpc_initiate_send_notify, xpc_initiate_received,
-			  xpc_initiate_partid_to_nasids);
-
-	return 0;
-}
-module_init(xpc_init);
-
-
-void __exit
-xpc_exit(void)
-{
-	xpc_do_exit(xpcUnloading);
-}
-module_exit(xpc_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
-MODULE_LICENSE("GPL");
-
-module_param(xpc_hb_interval, int, 0);
-MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
-		"heartbeat increments.");
-
-module_param(xpc_hb_check_interval, int, 0);
-MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
-		"heartbeat checks.");
-
-module_param(xpc_disengage_request_timelimit, int, 0);
-MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
-		"for disengage request to complete.");
-
-module_param(xpc_kdebug_ignore, int, 0);
-MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
-		"other partitions when dropping into kdebug.");
-
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
deleted file mode 100644
index 7ba4032..0000000
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) partition support.
- *
- *	This is the part of XPC that detects the presence/absence of
- *	other partitions. It provides a heartbeat and monitors the
- *	heartbeats of other partitions.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/sysctl.h>
-#include <linux/cache.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/uncached.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/nodepda.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/xpc.h>
-
-
-/* XPC is exiting flag */
-int xpc_exiting;
-
-
-/* SH_IPI_ACCESS shub register value on startup */
-static u64 xpc_sh1_IPI_access;
-static u64 xpc_sh2_IPI_access0;
-static u64 xpc_sh2_IPI_access1;
-static u64 xpc_sh2_IPI_access2;
-static u64 xpc_sh2_IPI_access3;
-
-
-/* original protection values for each node */
-u64 xpc_prot_vec[MAX_NUMNODES];
-
-
-/* this partition's reserved page pointers */
-struct xpc_rsvd_page *xpc_rsvd_page;
-static u64 *xpc_part_nasids;
-static u64 *xpc_mach_nasids;
-struct xpc_vars *xpc_vars;
-struct xpc_vars_part *xpc_vars_part;
-
-static int xp_nasid_mask_bytes;	/* actual size in bytes of nasid mask */
-static int xp_nasid_mask_words;	/* actual size in words of nasid mask */
-
-
-/*
- * For performance reasons, each entry of xpc_partitions[] is cacheline
- * aligned. And xpc_partitions[] is padded with an additional entry at the
- * end so that the last legitimate entry doesn't share its cacheline with
- * another variable.
- */
-struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-
-
-/*
- * Generic buffer used to store a local copy of portions of a remote
- * partition's reserved page (either its header and part_nasids mask,
- * or its vars).
- */
-char *xpc_remote_copy_buffer;
-void *xpc_remote_copy_buffer_base;
-
-
-/*
- * Guarantee that the kmalloc'd memory is cacheline aligned.
- */
-void *
-xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kmalloc will give us cachline aligned memory by default */
-	*base = kmalloc(size, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
-		return *base;
-	}
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kmalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Given a nasid, get the physical address of the  partition's reserved page
- * for that nasid. This function returns 0 on any error.
- */
-static u64
-xpc_get_rsvd_page_pa(int nasid)
-{
-	bte_result_t bte_res;
-	s64 status;
-	u64 cookie = 0;
-	u64 rp_pa = nasid;	/* seed with nasid */
-	u64 len = 0;
-	u64 buf = buf;
-	u64 buf_len = 0;
-	void *buf_base = NULL;
-
-
-	while (1) {
-
-		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
-								&len);
-
-		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
-			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
-			status, cookie, rp_pa, len);
-
-		if (status != SALRET_MORE_PASSES) {
-			break;
-		}
-
-		if (L1_CACHE_ALIGN(len) > buf_len) {
-			kfree(buf_base);
-			buf_len = L1_CACHE_ALIGN(len);
-			buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
-							GFP_KERNEL, &buf_base);
-			if (buf_base == NULL) {
-				dev_err(xpc_part, "unable to kmalloc "
-					"len=0x%016lx\n", buf_len);
-				status = SALRET_ERROR;
-				break;
-			}
-		}
-
-		bte_res = xp_bte_copy(rp_pa, buf, buf_len,
-					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bte_res != BTE_SUCCESS) {
-			dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
-			status = SALRET_ERROR;
-			break;
-		}
-	}
-
-	kfree(buf_base);
-
-	if (status != SALRET_OK) {
-		rp_pa = 0;
-	}
-	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
-	return rp_pa;
-}
-
-
-/*
- * Fill the partition reserved page with the information needed by
- * other partitions to discover we are alive and establish initial
- * communications.
- */
-struct xpc_rsvd_page *
-xpc_rsvd_page_init(void)
-{
-	struct xpc_rsvd_page *rp;
-	AMO_t *amos_page;
-	u64 rp_pa, nasid_array = 0;
-	int i, ret;
-
-
-	/* get the local reserved page's address */
-
-	preempt_disable();
-	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
-	preempt_enable();
-	if (rp_pa == 0) {
-		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
-		return NULL;
-	}
-	rp = (struct xpc_rsvd_page *) __va(rp_pa);
-
-	if (rp->partid != sn_partition_id) {
-		dev_err(xpc_part, "the reserved page's partid of %d should be "
-			"%d\n", rp->partid, sn_partition_id);
-		return NULL;
-	}
-
-	rp->version = XPC_RP_VERSION;
-
-	/* establish the actual sizes of the nasid masks */
-	if (rp->SAL_version == 1) {
-		/* SAL_version 1 didn't set the nasids_size field */
-		rp->nasids_size = 128;
-	}
-	xp_nasid_mask_bytes = rp->nasids_size;
-	xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
-
-	/* setup the pointers to the various items in the reserved page */
-	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
-	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
-	xpc_vars = XPC_RP_VARS(rp);
-	xpc_vars_part = XPC_RP_VARS_PART(rp);
-
-	/*
-	 * Before clearing xpc_vars, see if a page of AMOs had been previously
-	 * allocated. If not we'll need to allocate one and set permissions
-	 * so that cross-partition AMOs are allowed.
-	 *
-	 * The allocated AMO page needs MCA reporting to remain disabled after
-	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
-	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
-	 * which is pointed to by the reserved page, and re-use that saved copy
-	 * on subsequent loads of XPC. This AMO page is never freed, and its
-	 * memory protections are never restricted.
-	 */
-	if ((amos_page = xpc_vars->amos_page) == NULL) {
-		amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
-		if (amos_page == NULL) {
-			dev_err(xpc_part, "can't allocate page of AMOs\n");
-			return NULL;
-		}
-
-		/*
-		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
-		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
-		 */
-		if (!enable_shub_wars_1_1()) {
-			ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
-					PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
-					&nasid_array);
-			if (ret != 0) {
-				dev_err(xpc_part, "can't change memory "
-					"protections\n");
-				uncached_free_page(__IA64_UNCACHED_OFFSET |
-						   TO_PHYS((u64) amos_page));
-				return NULL;
-			}
-		}
-	} else if (!IS_AMO_ADDRESS((u64) amos_page)) {
-		/*
-		 * EFI's XPBOOT can also set amos_page in the reserved page,
-		 * but it happens to leave it as an uncached physical address
-		 * and we need it to be an uncached virtual, so we'll have to
-		 * convert it.
-		 */
-		if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
-			dev_err(xpc_part, "previously used amos_page address "
-				"is bad = 0x%p\n", (void *) amos_page);
-			return NULL;
-		}
-		amos_page = (AMO_t *) TO_AMO((u64) amos_page);
-	}
-
-	/* clear xpc_vars */
-	memset(xpc_vars, 0, sizeof(struct xpc_vars));
-
-	xpc_vars->version = XPC_V_VERSION;
-	xpc_vars->act_nasid = cpuid_to_nasid(0);
-	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
-	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
-	xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
-	xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
-
-
-	/* clear xpc_vars_part */
-	memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
-							XP_MAX_PARTITIONS);
-
-	/* initialize the activate IRQ related AMO variables */
-	for (i = 0; i < xp_nasid_mask_words; i++) {
-		(void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
-	}
-
-	/* initialize the engaged remote partitions related AMO variables */
-	(void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
-	(void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
-
-	/* timestamp of when reserved page was setup by XPC */
-	rp->stamp = CURRENT_TIME;
-
-	/*
-	 * This signifies to the remote partition that our reserved
-	 * page is initialized.
-	 */
-	rp->vars_pa = __pa(xpc_vars);
-
-	return rp;
-}
-
-
-/*
- * Change protections to allow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_allow_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-
-	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
-
-	if (is_shub2()) {
-		xpc_sh2_IPI_access0 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
-		xpc_sh2_IPI_access1 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
-		xpc_sh2_IPI_access2 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
-		xpc_sh2_IPI_access3 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-								-1UL);
-		}
-
-	} else {
-		xpc_sh1_IPI_access =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-								-1UL);
-
-			/*
-			 * Since the BIST collides with memory operations on
-			 * SHUB 1.1 sn_change_memprotect() cannot be used.
-			 */
-			if (enable_shub_wars_1_1()) {
-				/* open up everything */
-				xpc_prot_vec[node] = (u64) HUB_L((u64 *)
-						GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0));
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-								-1UL);
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-								-1UL);
-			}
-		}
-	}
-}
-
-
-/*
- * Restrict protections to disallow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_restrict_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-
-	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
-
-	if (is_shub2()) {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-							xpc_sh2_IPI_access0);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-							xpc_sh2_IPI_access1);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-							xpc_sh2_IPI_access2);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-							xpc_sh2_IPI_access3);
-		}
-
-	} else {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-							xpc_sh1_IPI_access);
-
-			if (enable_shub_wars_1_1()) {
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-							xpc_prot_vec[node]);
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-							xpc_prot_vec[node]);
-			}
-		}
-	}
-}
-
-
-/*
- * At periodic intervals, scan through all active partitions and ensure
- * their heartbeat is still active.  If not, the partition is deactivated.
- */
-void
-xpc_check_remote_hb(void)
-{
-	struct xpc_vars *remote_vars;
-	struct xpc_partition *part;
-	partid_t partid;
-	bte_result_t bres;
-
-
-	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-
-		if (xpc_exiting) {
-			break;
-		}
-
-		if (partid == sn_partition_id) {
-			continue;
-		}
-
-		part = &xpc_partitions[partid];
-
-		if (part->act_state == XPC_P_INACTIVE ||
-				part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		/* pull the remote_hb cache line */
-		bres = xp_bte_copy(part->remote_vars_pa,
-					(u64) remote_vars,
-					XPC_RP_VARS_SIZE,
-					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bres != BTE_SUCCESS) {
-			XPC_DEACTIVATE_PARTITION(part,
-						xpc_map_bte_errors(bres));
-			continue;
-		}
-
-		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
-			" = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
-			partid, remote_vars->heartbeat, part->last_heartbeat,
-			remote_vars->heartbeat_offline,
-			remote_vars->heartbeating_to_mask);
-
-		if (((remote_vars->heartbeat == part->last_heartbeat) &&
-			(remote_vars->heartbeat_offline == 0)) ||
-			     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
-
-			XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
-			continue;
-		}
-
-		part->last_heartbeat = remote_vars->heartbeat;
-	}
-}
-
-
-/*
- * Get a copy of a portion of the remote partition's rsvd page.
- *
- * remote_rp points to a buffer that is cacheline aligned for BTE copies and
- * is large enough to contain a copy of their reserved page header and
- * part_nasids mask.
- */
-static enum xpc_retval
-xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
-		struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
-{
-	int bres, i;
-
-
-	/* get the reserved page's physical address */
-
-	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
-	if (*remote_rp_pa == 0) {
-		return xpcNoRsvdPageAddr;
-	}
-
-
-	/* pull over the reserved page header and part_nasids mask */
-	bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
-				XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
-				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS) {
-		return xpc_map_bte_errors(bres);
-	}
-
-
-	if (discovered_nasids != NULL) {
-		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
-
-
-		for (i = 0; i < xp_nasid_mask_words; i++) {
-			discovered_nasids[i] |= remote_part_nasids[i];
-		}
-	}
-
-
-	/* check that the partid is for another partition */
-
-	if (remote_rp->partid < 1 ||
-				remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
-		return xpcInvalidPartid;
-	}
-
-	if (remote_rp->partid == sn_partition_id) {
-		return xpcLocalPartid;
-	}
-
-
-	if (XPC_VERSION_MAJOR(remote_rp->version) !=
-					XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
-		return xpcBadVersion;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Get a copy of the remote partition's XPC variables from the reserved page.
- *
- * remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RP_VARS_SIZE.
- */
-static enum xpc_retval
-xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	int bres;
-
-
-	if (remote_vars_pa == 0) {
-		return xpcVarsNotSet;
-	}
-
-	/* pull over the cross partition variables */
-	bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
-				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS) {
-		return xpc_map_bte_errors(bres);
-	}
-
-	if (XPC_VERSION_MAJOR(remote_vars->version) !=
-					XPC_VERSION_MAJOR(XPC_V_VERSION)) {
-		return xpcBadVersion;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Update the remote partition's info.
- */
-static void
-xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
-		struct timespec *remote_rp_stamp, u64 remote_rp_pa,
-		u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	part->remote_rp_version = remote_rp_version;
-	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
-		part->remote_rp_version);
-
-	part->remote_rp_stamp = *remote_rp_stamp;
-	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
-		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
-
-	part->remote_rp_pa = remote_rp_pa;
-	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
-
-	part->remote_vars_pa = remote_vars_pa;
-	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-		part->remote_vars_pa);
-
-	part->last_heartbeat = remote_vars->heartbeat;
-	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-		part->last_heartbeat);
-
-	part->remote_vars_part_pa = remote_vars->vars_part_pa;
-	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-		part->remote_vars_part_pa);
-
-	part->remote_act_nasid = remote_vars->act_nasid;
-	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-		part->remote_act_nasid);
-
-	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-		part->remote_act_phys_cpuid);
-
-	part->remote_amos_page_pa = remote_vars->amos_page_pa;
-	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-		part->remote_amos_page_pa);
-
-	part->remote_vars_version = remote_vars->version;
-	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
-		part->remote_vars_version);
-}
-
-
-/*
- * Prior code has determined the nasid which generated an IPI.  Inspect
- * that nasid to determine if its partition needs to be activated or
- * deactivated.
- *
- * A partition is consider "awaiting activation" if our partition
- * flags indicate it is not active and it has a heartbeat.  A
- * partition is considered "awaiting deactivation" if our partition
- * flags indicate it is active but it has no heartbeat or it is not
- * sending its heartbeat to us.
- *
- * To determine the heartbeat, the remote nasid must have a properly
- * initialized reserved page.
- */
-static void
-xpc_identify_act_IRQ_req(int nasid)
-{
-	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
-	int remote_rp_version;
-	int reactivate = 0;
-	int stamp_diff;
-	struct timespec remote_rp_stamp = { 0, 0 };
-	partid_t partid;
-	struct xpc_partition *part;
-	enum xpc_retval ret;
-
-
-	/* pull over the reserved page structure */
-
-	remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
-	if (ret != xpcSuccess) {
-		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
-			"which sent interrupt, reason=%d\n", nasid, ret);
-		return;
-	}
-
-	remote_vars_pa = remote_rp->vars_pa;
-	remote_rp_version = remote_rp->version;
-	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		remote_rp_stamp = remote_rp->stamp;
-	}
-	partid = remote_rp->partid;
-	part = &xpc_partitions[partid];
-
-
-	/* pull over the cross partition variables */
-
-	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-	if (ret != xpcSuccess) {
-
-		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
-			"which sent interrupt, reason=%d\n", nasid, ret);
-
-		XPC_DEACTIVATE_PARTITION(part, ret);
-		return;
-	}
-
-
-	part->act_IRQ_rcvd++;
-
-	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
-		"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
-		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
-
-	if (xpc_partition_disengaged(part) &&
-					part->act_state == XPC_P_INACTIVE) {
-
-		xpc_update_partition_info(part, remote_rp_version,
-					&remote_rp_stamp, remote_rp_pa,
-					remote_vars_pa, remote_vars);
-
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			if (xpc_partition_disengage_requested(1UL << partid)) {
-				/*
-				 * Other side is waiting on us to disengage,
-				 * even though we already have.
-				 */
-				return;
-			}
-		} else {
-			/* other side doesn't support disengage requests */
-			xpc_clear_partition_disengage_request(1UL << partid);
-		}
-
-		xpc_activate_partition(part);
-		return;
-	}
-
-	DBUG_ON(part->remote_rp_version == 0);
-	DBUG_ON(part->remote_vars_version == 0);
-
-	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
-		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-							remote_vars_version));
-
-		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-								version));
-			/* see if the other side rebooted */
-			if (part->remote_amos_page_pa ==
-				remote_vars->amos_page_pa &&
-					xpc_hb_allowed(sn_partition_id,
-								remote_vars)) {
-				/* doesn't look that way, so ignore the IPI */
-				return;
-			}
-		}
-
-		/*
-		 * Other side rebooted and previous XPC didn't support the
-		 * disengage request, so we don't need to do anything special.
-		 */
-
-		xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
-		return;
-	}
-
-	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
-
-	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		/*
-		 * Other side rebooted and previous XPC did support the
-		 * disengage request, but the new one doesn't.
-		 */
-
-		xpc_clear_partition_engaged(1UL << partid);
-		xpc_clear_partition_disengage_request(1UL << partid);
-
-		xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-		reactivate = 1;
-
-	} else {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
-							&remote_rp_stamp);
-		if (stamp_diff != 0) {
-			DBUG_ON(stamp_diff >= 0);
-
-			/*
-			 * Other side rebooted and the previous XPC did support
-			 * the disengage request, as does the new one.
-			 */
-
-			DBUG_ON(xpc_partition_engaged(1UL << partid));
-			DBUG_ON(xpc_partition_disengage_requested(1UL <<
-								partid));
-
-			xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-			reactivate = 1;
-		}
-	}
-
-	if (part->disengage_request_timeout > 0 &&
-					!xpc_partition_disengaged(part)) {
-		/* still waiting on other side to disengage from us */
-		return;
-	}
-
-	if (reactivate) {
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
-
-	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
-			xpc_partition_disengage_requested(1UL << partid)) {
-		XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
-	}
-}
-
-
-/*
- * Loop through the activation AMO variables and process any bits
- * which are set.  Each bit indicates a nasid sending a partition
- * activation or deactivation request.
- *
- * Return #of IRQs detected.
- */
-int
-xpc_identify_act_IRQ_sender(void)
-{
-	int word, bit;
-	u64 nasid_mask;
-	u64 nasid;			/* remote nasid */
-	int n_IRQs_detected = 0;
-	AMO_t *act_amos;
-
-
-	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
-
-
-	/* scan through act AMO variable looking for non-zero entries */
-	for (word = 0; word < xp_nasid_mask_words; word++) {
-
-		if (xpc_exiting) {
-			break;
-		}
-
-		nasid_mask = xpc_IPI_receive(&act_amos[word]);
-		if (nasid_mask == 0) {
-			/* no IRQs from nasids in this variable */
-			continue;
-		}
-
-		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
-			nasid_mask);
-
-
-		/*
-		 * If this nasid has been added to the machine since
-		 * our partition was reset, this will retain the
-		 * remote nasid in our reserved pages machine mask.
-		 * This is used in the event of module reload.
-		 */
-		xpc_mach_nasids[word] |= nasid_mask;
-
-
-		/* locate the nasid(s) which sent interrupts */
-
-		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
-			if (nasid_mask & (1UL << bit)) {
-				n_IRQs_detected++;
-				nasid = XPC_NASID_FROM_W_B(word, bit);
-				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
-					nasid);
-				xpc_identify_act_IRQ_req(nasid);
-			}
-		}
-	}
-	return n_IRQs_detected;
-}
-
-
-/*
- * See if the other side has responded to a partition disengage request
- * from us.
- */
-int
-xpc_partition_disengaged(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-	int disengaged;
-
-
-	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
-	if (part->disengage_request_timeout) {
-		if (!disengaged) {
-			if (jiffies < part->disengage_request_timeout) {
-				/* timelimit hasn't been reached yet */
-				return 0;
-			}
-
-			/*
-			 * Other side hasn't responded to our disengage
-			 * request in a timely fashion, so assume it's dead.
-			 */
-
-			dev_info(xpc_part, "disengage from remote partition %d "
-				"timed out\n", partid);
-			xpc_disengage_request_timedout = 1;
-			xpc_clear_partition_engaged(1UL << partid);
-			disengaged = 1;
-		}
-		part->disengage_request_timeout = 0;
-
-		/* cancel the timer function, provided it's not us */
-		if (!in_interrupt()) {
-			del_singleshot_timer_sync(&part->
-						      disengage_request_timer);
-		}
-
-		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
-					part->act_state != XPC_P_INACTIVE);
-		if (part->act_state != XPC_P_INACTIVE) {
-			xpc_wakeup_channel_mgr(part);
-		}
-
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			xpc_cancel_partition_disengage_request(part);
-		}
-	}
-	return disengaged;
-}
-
-
-/*
- * Mark specified partition as active.
- */
-enum xpc_retval
-xpc_mark_partition_active(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-	if (part->act_state == XPC_P_ACTIVATING) {
-		part->act_state = XPC_P_ACTIVE;
-		ret = xpcSuccess;
-	} else {
-		DBUG_ON(part->reason == xpcSuccess);
-		ret = part->reason;
-	}
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	return ret;
-}
-
-
-/*
- * Notify XPC that the partition is down.
- */
-void
-xpc_deactivate_partition(const int line, struct xpc_partition *part,
-				enum xpc_retval reason)
-{
-	unsigned long irq_flags;
-
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	if (part->act_state == XPC_P_INACTIVE) {
-		XPC_SET_REASON(part, reason, line);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		if (reason == xpcReactivating) {
-			/* we interrupt ourselves to reactivate partition */
-			xpc_IPI_send_reactivate(part);
-		}
-		return;
-	}
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
-					reason == xpcReactivating) {
-			XPC_SET_REASON(part, reason, line);
-		}
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		return;
-	}
-
-	part->act_state = XPC_P_DEACTIVATING;
-	XPC_SET_REASON(part, reason, line);
-
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-		xpc_request_partition_disengage(part);
-		xpc_IPI_send_disengage(part);
-
-		/* set a timelimit on the disengage request */
-		part->disengage_request_timeout = jiffies +
-					(xpc_disengage_request_timelimit * HZ);
-		part->disengage_request_timer.expires =
-					part->disengage_request_timeout;
-		add_timer(&part->disengage_request_timer);
-	}
-
-	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
-		XPC_PARTID(part), reason);
-
-	xpc_partition_going_down(part, reason);
-}
-
-
-/*
- * Mark specified partition as inactive.
- */
-void
-xpc_mark_partition_inactive(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-
-
-	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
-		XPC_PARTID(part));
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-	part->act_state = XPC_P_INACTIVE;
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-	part->remote_rp_pa = 0;
-}
-
-
-/*
- * SAL has provided a partition and machine mask.  The partition mask
- * contains a bit for each even nasid in our partition.  The machine
- * mask contains a bit for each even nasid in the entire machine.
- *
- * Using those two bit arrays, we can determine which nasids are
- * known in the machine.  Each should also have a reserved page
- * initialized if they are available for partitioning.
- */
-void
-xpc_discovery(void)
-{
-	void *remote_rp_base;
-	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
-	int region;
-	int region_size;
-	int max_regions;
-	int nasid;
-	struct xpc_rsvd_page *rp;
-	partid_t partid;
-	struct xpc_partition *part;
-	u64 *discovered_nasids;
-	enum xpc_retval ret;
-
-
-	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
-						xp_nasid_mask_bytes,
-						GFP_KERNEL, &remote_rp_base);
-	if (remote_rp == NULL) {
-		return;
-	}
-	remote_vars = (struct xpc_vars *) remote_rp;
-
-
-	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
-							GFP_KERNEL);
-	if (discovered_nasids == NULL) {
-		kfree(remote_rp_base);
-		return;
-	}
-
-	rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
-
-	/*
-	 * The term 'region' in this context refers to the minimum number of
-	 * nodes that can comprise an access protection grouping. The access
-	 * protection is in regards to memory, IOI and IPI.
-	 */
-	max_regions = 64;
-	region_size = sn_region_size;
-
-	switch (region_size) {
-	case 128:
-		max_regions *= 2;
-	case 64:
-		max_regions *= 2;
-	case 32:
-		max_regions *= 2;
-		region_size = 16;
-		DBUG_ON(!is_shub2());
-	}
-
-	for (region = 0; region < max_regions; region++) {
-
-		if ((volatile int) xpc_exiting) {
-			break;
-		}
-
-		dev_dbg(xpc_part, "searching region %d\n", region);
-
-		for (nasid = (region * region_size * 2);
-		     nasid < ((region + 1) * region_size * 2);
-		     nasid += 2) {
-
-			if ((volatile int) xpc_exiting) {
-				break;
-			}
-
-			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
-
-
-			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
-				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
-					"part of the local partition; skipping "
-					"region\n", nasid);
-				break;
-			}
-
-			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
-				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
-					"not on Numa-Link network at reset\n",
-					nasid);
-				continue;
-			}
-
-			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
-				dev_dbg(xpc_part, "Nasid %d is part of a "
-					"partition which was previously "
-					"discovered\n", nasid);
-				continue;
-			}
-
-
-			/* pull over the reserved page structure */
-
-			ret = xpc_get_remote_rp(nasid, discovered_nasids,
-					      remote_rp, &remote_rp_pa);
-			if (ret != xpcSuccess) {
-				dev_dbg(xpc_part, "unable to get reserved page "
-					"from nasid %d, reason=%d\n", nasid,
-					ret);
-
-				if (ret == xpcLocalPartid) {
-					break;
-				}
-				continue;
-			}
-
-			remote_vars_pa = remote_rp->vars_pa;
-
-			partid = remote_rp->partid;
-			part = &xpc_partitions[partid];
-
-
-			/* pull over the cross partition variables */
-
-			ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-			if (ret != xpcSuccess) {
-				dev_dbg(xpc_part, "unable to get XPC variables "
-					"from nasid %d, reason=%d\n", nasid,
-					ret);
-
-				XPC_DEACTIVATE_PARTITION(part, ret);
-				continue;
-			}
-
-			if (part->act_state != XPC_P_INACTIVE) {
-				dev_dbg(xpc_part, "partition %d on nasid %d is "
-					"already activating\n", partid, nasid);
-				break;
-			}
-
-			/*
-			 * Register the remote partition's AMOs with SAL so it
-			 * can handle and cleanup errors within that address
-			 * range should the remote partition go down. We don't
-			 * unregister this range because it is difficult to
-			 * tell when outstanding writes to the remote partition
-			 * are finished and thus when it is thus safe to
-			 * unregister. This should not result in wasted space
-			 * in the SAL xp_addr_region table because we should
-			 * get the same page for remote_act_amos_pa after
-			 * module reloads and system reboots.
-			 */
-			if (sn_register_xp_addr_region(
-					    remote_vars->amos_page_pa,
-							PAGE_SIZE, 1) < 0) {
-				dev_dbg(xpc_part, "partition %d failed to "
-					"register xp_addr region 0x%016lx\n",
-					partid, remote_vars->amos_page_pa);
-
-				XPC_SET_REASON(part, xpcPhysAddrRegFailed,
-						__LINE__);
-				break;
-			}
-
-			/*
-			 * The remote nasid is valid and available.
-			 * Send an interrupt to that nasid to notify
-			 * it that we are ready to begin activation.
-			 */
-			dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
-				"nasid %d, phys_cpuid 0x%x\n",
-				remote_vars->amos_page_pa,
-				remote_vars->act_nasid,
-				remote_vars->act_phys_cpuid);
-
-			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-								version)) {
-				part->remote_amos_page_pa =
-						remote_vars->amos_page_pa;
-				xpc_mark_partition_disengaged(part);
-				xpc_cancel_partition_disengage_request(part);
-			}
-			xpc_IPI_send_activate(remote_vars);
-		}
-	}
-
-	kfree(discovered_nasids);
-	kfree(remote_rp_base);
-}
-
-
-/*
- * Given a partid, get the nasids owned by that partition from the
- * remote partition's reserved page.
- */
-enum xpc_retval
-xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
-{
-	struct xpc_partition *part;
-	u64 part_nasid_pa;
-	int bte_res;
-
-
-	part = &xpc_partitions[partid];
-	if (part->remote_rp_pa == 0) {
-		return xpcPartitionDown;
-	}
-
-	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
-
-	part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
-
-	bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
-			xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-
-	return xpc_map_bte_errors(bte_res);
-}
-
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
deleted file mode 100644
index a5df672..0000000
--- a/arch/ia64/sn/kernel/xpnet.c
+++ /dev/null
@@ -1,718 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
- */
-
-
-/*
- * Cross Partition Network Interface (XPNET) support
- *
- *	XPNET provides a virtual network layered on top of the Cross
- *	Partition communication layer.
- *
- *	XPNET provides direct point-to-point and broadcast-like support
- *	for an ethernet-like device.  The ethernet broadcast medium is
- *	replaced with a point-to-point message structure which passes
- *	pointers to a DMA-capable block that a remote partition should
- *	retrieve and pass to the upper level networking layer.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/io.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/types.h>
-#include <asm/atomic.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * The message payload transferred by XPC.
- *
- * buf_pa is the physical address where the DMA should pull from.
- *
- * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
- * cacheline boundary.  To accomplish this, we record the number of
- * bytes from the beginning of the first cacheline to the first useful
- * byte of the skb (leadin_ignore) and the number of bytes from the
- * last useful byte of the skb to the end of the last cacheline
- * (tailout_ignore).
- *
- * size is the number of bytes to transfer which includes the skb->len
- * (useful bytes of the senders skb) plus the leadin and tailout
- */
-struct xpnet_message {
-	u16 version;		/* Version for this message */
-	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
-	u32 magic;		/* Special number indicating this is xpnet */
-	u64 buf_pa;		/* phys address of buffer to retrieve */
-	u32 size;		/* #of bytes in buffer */
-	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
-	u8 tailout_ignore;	/* #of bytes to ignore at the end */
-	unsigned char data;	/* body of small packets */
-};
-
-/*
- * Determine the size of our message, the cacheline aligned size,
- * and then the number of message will request from XPC.
- *
- * XPC expects each message to exist in an individual cacheline.
- */
-#define XPNET_MSG_SIZE		(L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
-#define XPNET_MSG_DATA_MAX	\
-		(XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
-#define XPNET_MSG_ALIGNED_SIZE	(L1_CACHE_ALIGN(XPNET_MSG_SIZE))
-#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
-
-
-#define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
-#define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
-
-/*
- * Version number of XPNET implementation. XPNET can always talk to versions
- * with same major #, and never talk to versions with a different version.
- */
-#define _XPNET_VERSION(_major, _minor)	(((_major) << 4) | (_minor))
-#define XPNET_VERSION_MAJOR(_v)		((_v) >> 4)
-#define XPNET_VERSION_MINOR(_v)		((_v) & 0xf)
-
-#define	XPNET_VERSION _XPNET_VERSION(1,0)		/* version 1.0 */
-#define	XPNET_VERSION_EMBED _XPNET_VERSION(1,1)		/* version 1.1 */
-#define XPNET_MAGIC	0x88786984 /* "XNET" */
-
-#define XPNET_VALID_MSG(_m)						     \
-   ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
-    && (msg->magic == XPNET_MAGIC))
-
-#define XPNET_DEVICE_NAME		"xp0"
-
-
-/*
- * When messages are queued with xpc_send_notify, a kmalloc'd buffer
- * of the following type is passed as a notification cookie.  When the
- * notification function is called, we use the cookie to decide
- * whether all outstanding message sends have completed.  The skb can
- * then be released.
- */
-struct xpnet_pending_msg {
-	struct list_head free_list;
-	struct sk_buff *skb;
-	atomic_t use_count;
-};
-
-/* driver specific structure pointed to by the device structure */
-struct xpnet_dev_private {
-	struct net_device_stats stats;
-};
-
-struct net_device *xpnet_device;
-
-/*
- * When we are notified of other partitions activating, we add them to
- * our bitmask of partitions to which we broadcast.
- */
-static u64 xpnet_broadcast_partitions;
-/* protect above */
-static DEFINE_SPINLOCK(xpnet_broadcast_lock);
-
-/*
- * Since the Block Transfer Engine (BTE) is being used for the transfer
- * and it relies upon cache-line size transfers, we need to reserve at
- * least one cache-line for head and tail alignment.  The BTE is
- * limited to 8MB transfers.
- *
- * Testing has shown that changing MTU to greater than 64KB has no effect
- * on TCP as the two sides negotiate a Max Segment Size that is limited
- * to 64K.  Other protocols May use packets greater than this, but for
- * now, the default is 64KB.
- */
-#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
-/* 32KB has been determined to be the ideal */
-#define XPNET_DEF_MTU (0x8000UL)
-
-
-/*
- * The partition id is encapsulated in the MAC address.  The following
- * define locates the octet the partid is in.
- */
-#define XPNET_PARTID_OCTET	1
-#define XPNET_LICENSE_OCTET	2
-
-
-/*
- * Define the XPNET debug device structure that is to be used with dev_dbg(),
- * dev_err(), dev_warn(), and dev_info().
- */
-struct device_driver xpnet_dbg_name = {
-	.name = "xpnet"
-};
-
-struct device xpnet_dbg_subname = {
-	.bus_id = {0},			/* set to "" */
-	.driver = &xpnet_dbg_name
-};
-
-struct device *xpnet = &xpnet_dbg_subname;
-
-/*
- * Packet was recevied by XPC and forwarded to us.
- */
-static void
-xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
-{
-	struct sk_buff *skb;
-	bte_result_t bret;
-	struct xpnet_dev_private *priv =
-		(struct xpnet_dev_private *) xpnet_device->priv;
-
-
-	if (!XPNET_VALID_MSG(msg)) {
-		/*
-		 * Packet with a different XPC version.  Ignore.
-		 */
-		xpc_received(partid, channel, (void *) msg);
-
-		priv->stats.rx_errors++;
-
-		return;
-	}
-	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
-		msg->leadin_ignore, msg->tailout_ignore);
-
-
-	/* reserve an extra cache line */
-	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
-	if (!skb) {
-		dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
-			msg->size + L1_CACHE_BYTES);
-
-		xpc_received(partid, channel, (void *) msg);
-
-		priv->stats.rx_errors++;
-
-		return;
-	}
-
-	/*
-	 * The allocated skb has some reserved space.
-	 * In order to use bte_copy, we need to get the
-	 * skb->data pointer moved forward.
-	 */
-	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
-					    (L1_CACHE_BYTES - 1)) +
-			  msg->leadin_ignore));
-
-	/*
-	 * Update the tail pointer to indicate data actually
-	 * transferred.
-	 */
-	skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
-
-	/*
-	 * Move the data over from the other side.
-	 */
-	if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
-						(msg->embedded_bytes != 0)) {
-		dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
-			"%lu)\n", skb->data, &msg->data,
-			(size_t) msg->embedded_bytes);
-
-		skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes);
-	} else {
-		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-			"bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
-			(void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-			msg->size);
-
-		bret = bte_copy(msg->buf_pa,
-				__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-				msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-
-		if (bret != BTE_SUCCESS) {
-			// >>> Need better way of cleaning skb.  Currently skb
-			// >>> appears in_use and we can't just call
-			// >>> dev_kfree_skb.
-			dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
-				"error=0x%x\n", (void *)msg->buf_pa,
-				(void *)__pa((u64)skb->data &
-							~(L1_CACHE_BYTES - 1)),
-				msg->size, bret);
-
-			xpc_received(partid, channel, (void *) msg);
-
-			priv->stats.rx_errors++;
-
-			return;
-		}
-	}
-
-	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
-		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
-		skb->len);
-
-	skb->protocol = eth_type_trans(skb, xpnet_device);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	dev_dbg(xpnet, "passing skb to network layer\n"
-		KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
-		"skb->end=0x%p skb->len=%d\n",
-		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
-		skb_end_pointer(skb), skb->len);
-
-
-	xpnet_device->last_rx = jiffies;
-	priv->stats.rx_packets++;
-	priv->stats.rx_bytes += skb->len + ETH_HLEN;
-
-	netif_rx_ni(skb);
-	xpc_received(partid, channel, (void *) msg);
-}
-
-
-/*
- * This is the handler which XPC calls during any sort of change in
- * state or message reception on a connection.
- */
-static void
-xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
-			  void *data, void *key)
-{
-	long bp;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(channel != XPC_NET_CHANNEL);
-
-	switch(reason) {
-	case xpcMsgReceived:	/* message received */
-		DBUG_ON(data == NULL);
-
-		xpnet_receive(partid, channel, (struct xpnet_message *) data);
-		break;
-
-	case xpcConnected:	/* connection completed to a partition */
-		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions |= 1UL << (partid -1 );
-		bp = xpnet_broadcast_partitions;
-		spin_unlock_bh(&xpnet_broadcast_lock);
-
-		netif_carrier_on(xpnet_device);
-
-		dev_dbg(xpnet, "%s connection created to partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
-		break;
-
-	default:
-		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions &= ~(1UL << (partid -1 ));
-		bp = xpnet_broadcast_partitions;
-		spin_unlock_bh(&xpnet_broadcast_lock);
-
-		if (bp == 0) {
-			netif_carrier_off(xpnet_device);
-		}
-
-		dev_dbg(xpnet, "%s disconnected from partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
-		break;
-
-	}
-}
-
-
-static int
-xpnet_dev_open(struct net_device *dev)
-{
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
-		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
-		XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
-		XPNET_MAX_IDLE_KTHREADS);
-
-	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
-			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
-			  XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
-	if (ret != xpcSuccess) {
-		dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
-			"ret=%d\n", dev->name, ret);
-
-		return -ENOMEM;
-	}
-
-	dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
-
-	return 0;
-}
-
-
-static int
-xpnet_dev_stop(struct net_device *dev)
-{
-	xpc_disconnect(XPC_NET_CHANNEL);
-
-	dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
-
-	return 0;
-}
-
-
-static int
-xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* 68 comes from min TCP+IP+MAC header */
-	if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
-		dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
-			"between 68 and %ld\n", dev->name, new_mtu,
-			XPNET_MAX_MTU);
-		return -EINVAL;
-	}
-
-	dev->mtu = new_mtu;
-	dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
-	return 0;
-}
-
-
-/*
- * Required for the net_device structure.
- */
-static int
-xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
-{
-	return 0;
-}
-
-
-/*
- * Return statistics to the caller.
- */
-static struct net_device_stats *
-xpnet_dev_get_stats(struct net_device *dev)
-{
-	struct xpnet_dev_private *priv;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-	return &priv->stats;
-}
-
-
-/*
- * Notification that the other end has received the message and
- * DMA'd the skb information.  At this point, they are done with
- * our side.  When all recipients are done processing, we
- * release the skb and then release our pending message structure.
- */
-static void
-xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
-			void *__qm)
-{
-	struct xpnet_pending_msg *queued_msg =
-		(struct xpnet_pending_msg *) __qm;
-
-
-	DBUG_ON(queued_msg == NULL);
-
-	dev_dbg(xpnet, "message to %d notified with reason %d\n",
-		partid, reason);
-
-	if (atomic_dec_return(&queued_msg->use_count) == 0) {
-		dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
-			(void *) queued_msg->skb->head);
-
-		dev_kfree_skb_any(queued_msg->skb);
-		kfree(queued_msg);
-	}
-}
-
-
-/*
- * Network layer has formatted a packet (skb) and is ready to place it
- * "on the wire".  Prepare and send an xpnet_message to all partitions
- * which have connected with us and are targets of this packet.
- *
- * MAC-NOTE:  For the XPNET driver, the MAC address contains the
- * destination partition_id.  If the destination partition id word
- * is 0xff, this packet is to broadcast to all partitions.
- */
-static int
-xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct xpnet_pending_msg *queued_msg;
-	enum xpc_retval ret;
-	struct xpnet_message *msg;
-	u64 start_addr, end_addr;
-	long dp;
-	u8 second_mac_octet;
-	partid_t dest_partid;
-	struct xpnet_dev_private *priv;
-	u16 embedded_bytes;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-
-	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
-		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
-		skb->len);
-
-
-	/*
-	 * The xpnet_pending_msg tracks how many outstanding
-	 * xpc_send_notifies are relying on this skb.  When none
-	 * remain, release the skb.
-	 */
-	queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
-	if (queued_msg == NULL) {
-		dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
-			"packet\n", sizeof(struct xpnet_pending_msg));
-
-		priv->stats.tx_errors++;
-
-		return -ENOMEM;
-	}
-
-
-	/* get the beginning of the first cacheline and end of last */
-	start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
-	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
-
-	/* calculate how many bytes to embed in the XPC message */
-	embedded_bytes = 0;
-	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
-		/* skb->data does fit so embed */
-		embedded_bytes = skb->len;
-	}
-
-
-	/*
-	 * Since the send occurs asynchronously, we set the count to one
-	 * and begin sending.  Any sends that happen to complete before
-	 * we are done sending will not free the skb.  We will be left
-	 * with that task during exit.  This also handles the case of
-	 * a packet destined for a partition which is no longer up.
-	 */
-	atomic_set(&queued_msg->use_count, 1);
-	queued_msg->skb = skb;
-
-
-	second_mac_octet = skb->data[XPNET_PARTID_OCTET];
-	if (second_mac_octet == 0xff) {
-		/* we are being asked to broadcast to all partitions */
-		dp = xpnet_broadcast_partitions;
-	} else if (second_mac_octet != 0) {
-		dp = xpnet_broadcast_partitions &
-					(1UL << (second_mac_octet - 1));
-	} else {
-		/* 0 is an invalid partid.  Ignore */
-		dp = 0;
-	}
-	dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
-
-	/*
-	 * If we wanted to allow promiscuous mode to work like an
-	 * unswitched network, this would be a good point to OR in a
-	 * mask of partitions which should be receiving all packets.
-	 */
-
-	/*
-	 * Main send loop.
-	 */
-	for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
-	     dest_partid++) {
-
-
-		if (!(dp & (1UL << (dest_partid - 1)))) {
-			/* not destined for this partition */
-			continue;
-		}
-
-		/* remove this partition from the destinations mask */
-		dp &= ~(1UL << (dest_partid - 1));
-
-
-		/* found a partition to send to */
-
-		ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
-				   XPC_NOWAIT, (void **)&msg);
-		if (unlikely(ret != xpcSuccess)) {
-			continue;
-		}
-
-		msg->embedded_bytes = embedded_bytes;
-		if (unlikely(embedded_bytes != 0)) {
-			msg->version = XPNET_VERSION_EMBED;
-			dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
-				&msg->data, skb->data, (size_t) embedded_bytes);
-			skb_copy_from_linear_data(skb, &msg->data,
-						  (size_t)embedded_bytes);
-		} else {
-			msg->version = XPNET_VERSION;
-		}
-		msg->magic = XPNET_MAGIC;
-		msg->size = end_addr - start_addr;
-		msg->leadin_ignore = (u64) skb->data - start_addr;
-		msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
-		msg->buf_pa = __pa(start_addr);
-
-		dev_dbg(xpnet, "sending XPC message to %d:%d\n"
-			KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
-			"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
-			dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
-			msg->leadin_ignore, msg->tailout_ignore);
-
-
-		atomic_inc(&queued_msg->use_count);
-
-		ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
-				      xpnet_send_completed, queued_msg);
-		if (unlikely(ret != xpcSuccess)) {
-			atomic_dec(&queued_msg->use_count);
-			continue;
-		}
-
-	}
-
-	if (atomic_dec_return(&queued_msg->use_count) == 0) {
-		dev_dbg(xpnet, "no partitions to receive packet destined for "
-			"%d\n", dest_partid);
-
-
-		dev_kfree_skb(skb);
-		kfree(queued_msg);
-	}
-
-	priv->stats.tx_packets++;
-	priv->stats.tx_bytes += skb->len;
-
-	return 0;
-}
-
-
-/*
- * Deal with transmit timeouts coming from the network layer.
- */
-static void
-xpnet_dev_tx_timeout (struct net_device *dev)
-{
-	struct xpnet_dev_private *priv;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-	priv->stats.tx_errors++;
-	return;
-}
-
-
-static int __init
-xpnet_init(void)
-{
-	int i;
-	u32 license_num;
-	int result = -ENOMEM;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
-
-	/*
-	 * use ether_setup() to init the majority of our device
-	 * structure and then override the necessary pieces.
-	 */
-	xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
-				    XPNET_DEVICE_NAME, ether_setup);
-	if (xpnet_device == NULL) {
-		return -ENOMEM;
-	}
-
-	netif_carrier_off(xpnet_device);
-
-	xpnet_device->mtu = XPNET_DEF_MTU;
-	xpnet_device->change_mtu = xpnet_dev_change_mtu;
-	xpnet_device->open = xpnet_dev_open;
-	xpnet_device->get_stats = xpnet_dev_get_stats;
-	xpnet_device->stop = xpnet_dev_stop;
-	xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
-	xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
-	xpnet_device->set_config = xpnet_dev_set_config;
-
-	/*
-	 * Multicast assumes the LSB of the first octet is set for multicast
-	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
-	 * to collide with any vendor's officially issued MAC.
-	 */
-	xpnet_device->dev_addr[0] = 0xfe;
-	xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
-	license_num = sn_partition_serial_number_val();
-	for (i = 3; i >= 0; i--) {
-		xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
-							license_num & 0xff;
-		license_num = license_num >> 8;
-	}
-
-	/*
-	 * ether_setup() sets this to a multicast device.  We are
-	 * really not supporting multicast at this time.
-	 */
-	xpnet_device->flags &= ~IFF_MULTICAST;
-
-	/*
-	 * No need to checksum as it is a DMA transfer.  The BTE will
-	 * report an error if the data is not retrievable and the
-	 * packet will be dropped.
-	 */
-	xpnet_device->features = NETIF_F_NO_CSUM;
-
-	result = register_netdev(xpnet_device);
-	if (result != 0) {
-		free_netdev(xpnet_device);
-	}
-
-	return result;
-}
-module_init(xpnet_init);
-
-
-static void __exit
-xpnet_exit(void)
-{
-	dev_info(xpnet, "unregistering network device %s\n",
-		xpnet_device[0].name);
-
-	unregister_netdev(xpnet_device);
-
-	free_netdev(xpnet_device);
-}
-module_exit(xpnet_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
-MODULE_LICENSE("GPL");
-
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 18b94b7..52175af 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/dma-attrs.h>
 #include <asm/dma.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/pcibus_provider_defs.h>
@@ -149,11 +150,12 @@ void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 EXPORT_SYMBOL(sn_dma_free_coherent);
 
 /**
- * sn_dma_map_single - map a single page for DMA
+ * sn_dma_map_single_attrs - map a single page for DMA
  * @dev: device to map for
  * @cpu_addr: kernel virtual address of the region to map
  * @size: size of the region
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * Map the region pointed to by @cpu_addr for DMA and return the
  * DMA address.
@@ -163,42 +165,59 @@ EXPORT_SYMBOL(sn_dma_free_coherent);
  * no way of saving the dmamap handle from the alloc to later free
  * (which is pretty much unacceptable).
  *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
+ *
  * TODO: simplify our interface;
  *       figure out how to save dmamap handle so can use two step.
  */
-dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-			     int direction)
+dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr,
+				   size_t size, int direction,
+				   struct dma_attrs *attrs)
 {
 	dma_addr_t dma_addr;
 	unsigned long phys_addr;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
 	phys_addr = __pa(cpu_addr);
-	dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS);
+	if (dmabarr)
+		dma_addr = provider->dma_map_consistent(pdev, phys_addr,
+							size, SN_DMA_ADDR_PHYS);
+	else
+		dma_addr = provider->dma_map(pdev, phys_addr, size,
+					     SN_DMA_ADDR_PHYS);
+
 	if (!dma_addr) {
 		printk(KERN_ERR "%s: out of ATEs\n", __func__);
 		return 0;
 	}
 	return dma_addr;
 }
-EXPORT_SYMBOL(sn_dma_map_single);
+EXPORT_SYMBOL(sn_dma_map_single_attrs);
 
 /**
- * sn_dma_unmap_single - unamp a DMA mapped page
+ * sn_dma_unmap_single_attrs - unamp a DMA mapped page
  * @dev: device to sync
  * @dma_addr: DMA address to sync
  * @size: size of region
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * This routine is supposed to sync the DMA region specified
  * by @dma_handle into the coherence domain.  On SN, we're always cache
  * coherent, so we just need to free any ATEs associated with this mapping.
  */
-void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-			 int direction)
+void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+			       size_t size, int direction,
+			       struct dma_attrs *attrs)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
@@ -207,19 +226,21 @@ void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	provider->dma_unmap(pdev, dma_addr, direction);
 }
-EXPORT_SYMBOL(sn_dma_unmap_single);
+EXPORT_SYMBOL(sn_dma_unmap_single_attrs);
 
 /**
- * sn_dma_unmap_sg - unmap a DMA scatterlist
+ * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist
  * @dev: device to unmap
  * @sg: scatterlist to unmap
  * @nhwentries: number of scatterlist entries
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * Unmap a set of streaming mode DMA translations.
  */
-void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
-		     int nhwentries, int direction)
+void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			   int nhwentries, int direction,
+			   struct dma_attrs *attrs)
 {
 	int i;
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -234,25 +255,34 @@ void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
 		sg->dma_length = 0;
 	}
 }
-EXPORT_SYMBOL(sn_dma_unmap_sg);
+EXPORT_SYMBOL(sn_dma_unmap_sg_attrs);
 
 /**
- * sn_dma_map_sg - map a scatterlist for DMA
+ * sn_dma_map_sg_attrs - map a scatterlist for DMA
  * @dev: device to map for
  * @sg: scatterlist to map
  * @nhwentries: number of entries
  * @direction: direction of the DMA transaction
+ * @attrs: optional dma attributes
+ *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
  *
  * Maps each entry of @sg for DMA.
  */
-int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
-		  int direction)
+int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			int nhwentries, int direction, struct dma_attrs *attrs)
 {
 	unsigned long phys_addr;
 	struct scatterlist *saved_sg = sgl, *sg;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
@@ -260,11 +290,19 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
 	 * Setup a DMA address for each entry in the scatterlist.
 	 */
 	for_each_sg(sgl, sg, nhwentries, i) {
+		dma_addr_t dma_addr;
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		sg->dma_address = provider->dma_map(pdev,
-						    phys_addr, sg->length,
-						    SN_DMA_ADDR_PHYS);
+		if (dmabarr)
+			dma_addr = provider->dma_map_consistent(pdev,
+								phys_addr,
+								sg->length,
+								SN_DMA_ADDR_PHYS);
+		else
+			dma_addr = provider->dma_map(pdev, phys_addr,
+						     sg->length,
+						     SN_DMA_ADDR_PHYS);
 
+		sg->dma_address = dma_addr;
 		if (!sg->dma_address) {
 			printk(KERN_ERR "%s: out of ATEs\n", __func__);
 
@@ -272,7 +310,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
 			 * Free any successfully allocated entries.
 			 */
 			if (i > 0)
-				sn_dma_unmap_sg(dev, saved_sg, i, direction);
+				sn_dma_unmap_sg_attrs(dev, saved_sg, i,
+						      direction, attrs);
 			return 0;
 		}
 
@@ -281,7 +320,7 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
 
 	return nhwentries;
 }
-EXPORT_SYMBOL(sn_dma_map_sg);
+EXPORT_SYMBOL(sn_dma_map_sg_attrs);
 
 void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 				size_t size, int direction)
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 9b3c113..94e5845 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -655,7 +655,8 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
  *
  * Simply call tioce_do_dma_map() to create a map with the barrier bit set
  * in the address.
- */ static u64
+ */
+static u64
 tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
 {
 	return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags);
@@ -668,7 +669,8 @@ tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma
  *
  * Handle a CE error interrupt.  Simply a wrapper around a SAL call which
  * defers processing to the SGI prom.
- */ static irqreturn_t
+ */
+static irqreturn_t
 tioce_error_intr_handler(int irq, void *arg)
 {
 	struct tioce_common *soft = arg;