288 files changed, 15680 insertions, 2398 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 3d74801..56a4df9 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,10 +70,6 @@ config GENERIC_ISA_DMA
 	bool
 	default y
 
-config GENERIC_IOMAP
-	bool
-	default n
-
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
@@ -319,6 +315,7 @@ config ISA_DMA_API
 config PCI
 	bool
 	depends on !ALPHA_JENSEN
+	select GENERIC_PCI_IOMAP
 	default y
 	help
 	  Find out whether you have a PCI motherboard. PCI is the name of a
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 246100e..04eea48 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -185,15 +185,3 @@ struct dma_map_ops alpha_noop_ops = {
 
 struct dma_map_ops *dma_ops = &alpha_noop_ops;
 EXPORT_SYMBOL(dma_ops);
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	return NULL;
-}
-
-void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
-{
-}
-
-EXPORT_SYMBOL(pci_iomap);
-EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index c9ab94e..f3cae27 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -508,30 +508,7 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
 	return -EOPNOTSUPP;
 }
 
-/* Create an __iomem token from a PCI BAR.  Copied from lib/iomap.c with
-   no changes, since we don't want the other things in that object file.  */
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		/* Not checking IORESOURCE_CACHEABLE because alpha does
-		   not distinguish between ioremap and ioremap_nocache.  */
-		return ioremap(start, len);
-	}
-	return NULL;
-}
-
-/* Destroy that token.  Not copied from lib/iomap.c.  */
+/* Destroy an __iomem token.  Not copied from lib/iomap.c.  */
 
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
@@ -539,7 +516,6 @@ void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 		iounmap(addr);
 }
 
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
 
 /* FIXME: Some boxes have multiple ISA bridges! */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d66dfc..24626b0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -16,6 +16,7 @@ config ARM
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
+	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
@@ -30,6 +31,7 @@ config ARM
 	select HAVE_SPARSE_IRQ
 	select GENERIC_IRQ_SHOW
 	select CPU_PM if (SUSPEND || CPU_IDLE)
+	select GENERIC_PCI_IOMAP
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 065d100..9275828 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -27,6 +27,7 @@
 #include <asm/byteorder.h>
 #include <asm/memory.h>
 #include <asm/system.h>
+#include <asm-generic/pci_iomap.h>
 
 /*
  * ISA I/O bus memory addresses are 1:1 with the physical address.
@@ -306,7 +307,6 @@ extern void ioport_unmap(void __iomem *addr);
 
 struct pci_dev;
 
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen);
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 
 /*
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index f8a682f..6b22b54 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -127,7 +127,7 @@ static void da850_evm_m25p80_notify_add(struct mtd_info *mtd)
 	size_t retlen;
 
 	if (!strcmp(mtd->name, "MAC-Address")) {
-		mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
+		mtd_read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
 		if (retlen == ETH_ALEN)
 			pr_info("Read MAC addr from SPI Flash: %pM\n",
 				mac_addr);
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 35d5dff..46dfd1a 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -28,6 +28,7 @@
 #include <plat/board.h>
 #include <plat/mcbsp.h>
 #include <plat/mmc.h>
+#include <plat/iommu.h>
 #include <plat/dma.h>
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
@@ -211,9 +212,15 @@ static struct platform_device omap3isp_device = {
 	.resource	= omap3isp_resources,
 };
 
+static struct omap_iommu_arch_data omap3_isp_iommu = {
+	.name = "isp",
+};
+
 int omap3_init_camera(struct isp_platform_data *pdata)
 {
 	omap3isp_device.dev.platform_data = pdata;
+	omap3isp_device.dev.archdata.iommu = &omap3_isp_iommu;
+
 	return platform_device_register(&omap3isp_device);
 }
 
diff --git a/arch/arm/mach-tegra/include/mach/kbc.h b/arch/arm/mach-tegra/include/mach/kbc.h
index 4f3572a..20bb054 100644
--- a/arch/arm/mach-tegra/include/mach/kbc.h
+++ b/arch/arm/mach-tegra/include/mach/kbc.h
@@ -53,6 +53,7 @@ struct tegra_kbc_platform_data {
 	struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO];
 	const struct matrix_keymap_data *keymap_data;
 
+	u32 wakeup_key;
 	bool wakeup;
 	bool use_fn_map;
 	bool use_ghost_filter;
diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index 1cbcd4f..54d8f34 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -7,8 +7,8 @@ comment "ST-Ericsson Mobile Platform Products"
 config MACH_U300
 	bool "U300"
 	select PINCTRL
-	select PINMUX_U300
-	select GPIO_U300
+	select PINCTRL_U300
+	select PINCTRL_COH901
 
 comment "ST-Ericsson U300/U330/U335/U365 Feature Selections"
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 6979307..b4c6926 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -1605,15 +1605,15 @@ static struct platform_device pinmux_device = {
 };
 
 /* Pinmux settings */
-static struct pinmux_map u300_pinmux_map[] = {
+static struct pinmux_map __initdata u300_pinmux_map[] = {
 	/* anonymous maps for chip power and EMIFs */
-	PINMUX_MAP_PRIMARY_SYS_HOG("POWER", "power"),
-	PINMUX_MAP_PRIMARY_SYS_HOG("EMIF0", "emif0"),
-	PINMUX_MAP_PRIMARY_SYS_HOG("EMIF1", "emif1"),
+	PINMUX_MAP_SYS_HOG("POWER", "pinmux-u300", "power"),
+	PINMUX_MAP_SYS_HOG("EMIF0", "pinmux-u300", "emif0"),
+	PINMUX_MAP_SYS_HOG("EMIF1", "pinmux-u300", "emif1"),
 	/* per-device maps for MMC/SD, SPI and UART */
-	PINMUX_MAP_PRIMARY("MMCSD", "mmc0", "mmci"),
-	PINMUX_MAP_PRIMARY("SPI", "spi0", "pl022"),
-	PINMUX_MAP_PRIMARY("UART0", "uart0", "uart0"),
+	PINMUX_MAP("MMCSD", "pinmux-u300", "mmc0", "mmci"),
+	PINMUX_MAP("SPI", "pinmux-u300", "spi0", "pl022"),
+	PINMUX_MAP("UART0", "pinmux-u300", "uart0", "uart0"),
 };
 
 struct u300_mux_hog {
diff --git a/arch/arm/mach-u300/include/mach/gpio-u300.h b/arch/arm/mach-u300/include/mach/gpio-u300.h
index 0c2b202..bf4c793 100644
--- a/arch/arm/mach-u300/include/mach/gpio-u300.h
+++ b/arch/arm/mach-u300/include/mach/gpio-u300.h
@@ -9,121 +9,6 @@
 #ifndef __MACH_U300_GPIO_U300_H
 #define __MACH_U300_GPIO_U300_H
 
-/*
- * Individual pin assignments for the B26/S26. Notice that the
- * actual usage of these pins depends on the PAD MUX settings, that
- * is why the same number can potentially appear several times.
- * In the reference design each pin is only used for one purpose.
- * These were determined by inspecting the B26/S26 schematic:
- * 2/1911-ROA 128 1603
- */
-#ifdef CONFIG_MACH_U300_BS2X
-#define U300_GPIO_PIN_UART_RX		0
-#define U300_GPIO_PIN_UART_TX		1
-#define U300_GPIO_PIN_GPIO02		2  /* Unrouted */
-#define U300_GPIO_PIN_GPIO03		3  /* Unrouted */
-#define U300_GPIO_PIN_CAM_SLEEP		4
-#define U300_GPIO_PIN_CAM_REG_EN	5
-#define U300_GPIO_PIN_GPIO06		6  /* Unrouted */
-#define U300_GPIO_PIN_GPIO07		7  /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO08		8  /* Service point SP2321 */
-#define U300_GPIO_PIN_GPIO09		9  /* Service point SP2322 */
-#define U300_GPIO_PIN_PHFSENSE		10 /* Headphone jack sensing */
-#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
-#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
-#define U300_GPIO_PIN_FLIPSENSE		13 /* Mechanical flip sensing */
-#define U300_GPIO_PIN_GPIO14		14 /* DSP JTAG Port RTCK */
-#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO16		16 /* Unrouted */
-#define U300_GPIO_PIN_GPIO17		17 /* Unrouted */
-#define U300_GPIO_PIN_GPIO18		18 /* Unrouted */
-#define U300_GPIO_PIN_GPIO19		19 /* Unrouted */
-#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
-#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
-#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
-#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
-#endif
-
-/*
- * Individual pin assignments for the B330/S330 and B365/S365.
- * Notice that the actual usage of these pins depends on the
- * PAD MUX settings, that is why the same number can potentially
- * appear several times. In the reference design each pin is only
- * used for one purpose. These were determined by inspecting the
- * S365 schematic.
- */
-#if defined(CONFIG_MACH_U300_BS330) || defined(CONFIG_MACH_U300_BS365) || \
-    defined(CONFIG_MACH_U300_BS335)
-#define U300_GPIO_PIN_UART_RX		0
-#define U300_GPIO_PIN_UART_TX		1
-#define U300_GPIO_PIN_UART_CTS		2
-#define U300_GPIO_PIN_UART_RTS		3
-#define U300_GPIO_PIN_CAM_MAIN_STANDBY	4 /* Camera MAIN standby */
-#define U300_GPIO_PIN_GPIO05		5 /* Unrouted */
-#define U300_GPIO_PIN_MS_CD		6 /* Memory Stick Card insertion */
-#define U300_GPIO_PIN_GPIO07		7 /* Test point TP2430 */
-
-#define U300_GPIO_PIN_GPIO08		8 /* Test point TP2437 */
-#define U300_GPIO_PIN_GPIO09		9 /* Test point TP2431 */
-#define U300_GPIO_PIN_GPIO10		10 /* Test point TP2432 */
-#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
-#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
-#define U300_GPIO_PIN_CAM_SUB_STANDBY	13 /* Camera SUB standby */
-#define U300_GPIO_PIN_GPIO14		14 /* Test point TP2436 */
-#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO16		16 /* Test point TP2438 */
-#define U300_GPIO_PIN_PHFSENSE		17 /* Headphone jack sensing */
-#define U300_GPIO_PIN_GPIO18		18 /* Test point TP2439 */
-#define U300_GPIO_PIN_GPIO19		19 /* Routed somewhere */
-#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
-#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
-#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
-#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO24		24 /* Unrouted */
-#define U300_GPIO_PIN_GPIO25		25 /* Unrouted */
-#define U300_GPIO_PIN_GPIO26		26 /* Unrouted */
-#define U300_GPIO_PIN_GPIO27		27 /* Unrouted */
-#define U300_GPIO_PIN_GPIO28		28 /* Unrouted */
-#define U300_GPIO_PIN_GPIO29		29 /* Unrouted */
-#define U300_GPIO_PIN_GPIO30		30 /* Unrouted */
-#define U300_GPIO_PIN_GPIO31		31 /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO32		32 /* Unrouted */
-#define U300_GPIO_PIN_GPIO33		33 /* Unrouted */
-#define U300_GPIO_PIN_GPIO34		34 /* Unrouted */
-#define U300_GPIO_PIN_GPIO35		35 /* Unrouted */
-#define U300_GPIO_PIN_GPIO36		36 /* Unrouted */
-#define U300_GPIO_PIN_GPIO37		37 /* Unrouted */
-#define U300_GPIO_PIN_GPIO38		38 /* Unrouted */
-#define U300_GPIO_PIN_GPIO39		39 /* Unrouted */
-
-#ifdef CONFIG_MACH_U300_BS335
-
-#define U300_GPIO_PIN_GPIO40		40 /* Unrouted */
-#define U300_GPIO_PIN_GPIO41		41 /* Unrouted */
-#define U300_GPIO_PIN_GPIO42		42 /* Unrouted */
-#define U300_GPIO_PIN_GPIO43		43 /* Unrouted */
-#define U300_GPIO_PIN_GPIO44		44 /* Unrouted */
-#define U300_GPIO_PIN_GPIO45		45 /* Unrouted */
-#define U300_GPIO_PIN_GPIO46		46 /* Unrouted */
-#define U300_GPIO_PIN_GPIO47		47 /* Unrouted */
-
-#define U300_GPIO_PIN_GPIO48		48 /* Unrouted */
-#define U300_GPIO_PIN_GPIO49		49 /* Unrouted */
-#define U300_GPIO_PIN_GPIO50		50 /* Unrouted */
-#define U300_GPIO_PIN_GPIO51		51 /* Unrouted */
-#define U300_GPIO_PIN_GPIO52		52 /* Unrouted */
-#define U300_GPIO_PIN_GPIO53		53 /* Unrouted */
-#define U300_GPIO_PIN_GPIO54		54 /* Unrouted */
-#define U300_GPIO_PIN_GPIO55		55 /* Unrouted */
-#endif
-
-#endif
-
 /**
  * enum u300_gpio_variant - the type of U300 GPIO employed
  */
diff --git a/arch/arm/mach-u300/include/mach/irqs.h b/arch/arm/mach-u300/include/mach/irqs.h
index db3fbfa..ee78a26 100644
--- a/arch/arm/mach-u300/include/mach/irqs.h
+++ b/arch/arm/mach-u300/include/mach/irqs.h
@@ -110,7 +110,7 @@
 #endif
 
 /* Maximum 8*7 GPIO lines */
-#ifdef CONFIG_GPIO_U300
+#ifdef CONFIG_PINCTRL_COH901
 #define IRQ_U300_GPIO_BASE		(U300_VIC_IRQS_END)
 #define IRQ_U300_GPIO_END		(IRQ_U300_GPIO_BASE + 56)
 #else
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
index 4d482aa..05abd6a 100644
--- a/arch/arm/mach-u300/mmc.c
+++ b/arch/arm/mach-u300/mmc.c
@@ -18,8 +18,8 @@
 #include <linux/slab.h>
 #include <mach/coh901318.h>
 #include <mach/dma_channels.h>
-#include <mach/gpio-u300.h>
 
+#include "u300-gpio.h"
 #include "mmc.h"
 
 static struct mmci_platform_data mmc0_plat_data = {
diff --git a/arch/arm/mach-u300/u300-gpio.h b/arch/arm/mach-u300/u300-gpio.h
new file mode 100644
index 0000000..847dc25
--- /dev/null
+++ b/arch/arm/mach-u300/u300-gpio.h
@@ -0,0 +1,114 @@
+/*
+ * Individual pin assignments for the B26/S26. Notice that the
+ * actual usage of these pins depends on the PAD MUX settings, that
+ * is why the same number can potentially appear several times.
+ * In the reference design each pin is only used for one purpose.
+ * These were determined by inspecting the B26/S26 schematic:
+ * 2/1911-ROA 128 1603
+ */
+#ifdef CONFIG_MACH_U300_BS2X
+#define U300_GPIO_PIN_UART_RX		0
+#define U300_GPIO_PIN_UART_TX		1
+#define U300_GPIO_PIN_GPIO02		2  /* Unrouted */
+#define U300_GPIO_PIN_GPIO03		3  /* Unrouted */
+#define U300_GPIO_PIN_CAM_SLEEP		4
+#define U300_GPIO_PIN_CAM_REG_EN	5
+#define U300_GPIO_PIN_GPIO06		6  /* Unrouted */
+#define U300_GPIO_PIN_GPIO07		7  /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO08		8  /* Service point SP2321 */
+#define U300_GPIO_PIN_GPIO09		9  /* Service point SP2322 */
+#define U300_GPIO_PIN_PHFSENSE		10 /* Headphone jack sensing */
+#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
+#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
+#define U300_GPIO_PIN_FLIPSENSE		13 /* Mechanical flip sensing */
+#define U300_GPIO_PIN_GPIO14		14 /* DSP JTAG Port RTCK */
+#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO16		16 /* Unrouted */
+#define U300_GPIO_PIN_GPIO17		17 /* Unrouted */
+#define U300_GPIO_PIN_GPIO18		18 /* Unrouted */
+#define U300_GPIO_PIN_GPIO19		19 /* Unrouted */
+#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
+#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
+#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
+#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
+#endif
+
+/*
+ * Individual pin assignments for the B330/S330 and B365/S365.
+ * Notice that the actual usage of these pins depends on the
+ * PAD MUX settings, that is why the same number can potentially
+ * appear several times. In the reference design each pin is only
+ * used for one purpose. These were determined by inspecting the
+ * S365 schematic.
+ */
+#if defined(CONFIG_MACH_U300_BS330) || defined(CONFIG_MACH_U300_BS365) || \
+    defined(CONFIG_MACH_U300_BS335)
+#define U300_GPIO_PIN_UART_RX		0
+#define U300_GPIO_PIN_UART_TX		1
+#define U300_GPIO_PIN_UART_CTS		2
+#define U300_GPIO_PIN_UART_RTS		3
+#define U300_GPIO_PIN_CAM_MAIN_STANDBY	4 /* Camera MAIN standby */
+#define U300_GPIO_PIN_GPIO05		5 /* Unrouted */
+#define U300_GPIO_PIN_MS_CD		6 /* Memory Stick Card insertion */
+#define U300_GPIO_PIN_GPIO07		7 /* Test point TP2430 */
+
+#define U300_GPIO_PIN_GPIO08		8 /* Test point TP2437 */
+#define U300_GPIO_PIN_GPIO09		9 /* Test point TP2431 */
+#define U300_GPIO_PIN_GPIO10		10 /* Test point TP2432 */
+#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
+#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
+#define U300_GPIO_PIN_CAM_SUB_STANDBY	13 /* Camera SUB standby */
+#define U300_GPIO_PIN_GPIO14		14 /* Test point TP2436 */
+#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO16		16 /* Test point TP2438 */
+#define U300_GPIO_PIN_PHFSENSE		17 /* Headphone jack sensing */
+#define U300_GPIO_PIN_GPIO18		18 /* Test point TP2439 */
+#define U300_GPIO_PIN_GPIO19		19 /* Routed somewhere */
+#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
+#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
+#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
+#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO24		24 /* Unrouted */
+#define U300_GPIO_PIN_GPIO25		25 /* Unrouted */
+#define U300_GPIO_PIN_GPIO26		26 /* Unrouted */
+#define U300_GPIO_PIN_GPIO27		27 /* Unrouted */
+#define U300_GPIO_PIN_GPIO28		28 /* Unrouted */
+#define U300_GPIO_PIN_GPIO29		29 /* Unrouted */
+#define U300_GPIO_PIN_GPIO30		30 /* Unrouted */
+#define U300_GPIO_PIN_GPIO31		31 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO32		32 /* Unrouted */
+#define U300_GPIO_PIN_GPIO33		33 /* Unrouted */
+#define U300_GPIO_PIN_GPIO34		34 /* Unrouted */
+#define U300_GPIO_PIN_GPIO35		35 /* Unrouted */
+#define U300_GPIO_PIN_GPIO36		36 /* Unrouted */
+#define U300_GPIO_PIN_GPIO37		37 /* Unrouted */
+#define U300_GPIO_PIN_GPIO38		38 /* Unrouted */
+#define U300_GPIO_PIN_GPIO39		39 /* Unrouted */
+
+#ifdef CONFIG_MACH_U300_BS335
+
+#define U300_GPIO_PIN_GPIO40		40 /* Unrouted */
+#define U300_GPIO_PIN_GPIO41		41 /* Unrouted */
+#define U300_GPIO_PIN_GPIO42		42 /* Unrouted */
+#define U300_GPIO_PIN_GPIO43		43 /* Unrouted */
+#define U300_GPIO_PIN_GPIO44		44 /* Unrouted */
+#define U300_GPIO_PIN_GPIO45		45 /* Unrouted */
+#define U300_GPIO_PIN_GPIO46		46 /* Unrouted */
+#define U300_GPIO_PIN_GPIO47		47 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO48		48 /* Unrouted */
+#define U300_GPIO_PIN_GPIO49		49 /* Unrouted */
+#define U300_GPIO_PIN_GPIO50		50 /* Unrouted */
+#define U300_GPIO_PIN_GPIO51		51 /* Unrouted */
+#define U300_GPIO_PIN_GPIO52		52 /* Unrouted */
+#define U300_GPIO_PIN_GPIO53		53 /* Unrouted */
+#define U300_GPIO_PIN_GPIO54		54 /* Unrouted */
+#define U300_GPIO_PIN_GPIO55		55 /* Unrouted */
+#endif
+
+#endif
diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c
index 430df1a..e62956e 100644
--- a/arch/arm/mm/iomap.c
+++ b/arch/arm/mm/iomap.c
@@ -35,27 +35,6 @@ EXPORT_SYMBOL(pcibios_min_mem);
 unsigned int pci_flags = PCI_REASSIGN_ALL_RSRC;
 EXPORT_SYMBOL(pci_flags);
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len   = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
 	if ((unsigned long)addr >= VMALLOC_START &&
diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h
index a1d79ee..88be3e6 100644
--- a/arch/arm/plat-omap/include/plat/iommu.h
+++ b/arch/arm/plat-omap/include/plat/iommu.h
@@ -111,6 +111,32 @@ struct iommu_platform_data {
 	u32 da_end;
 };
 
+/**
+ * struct iommu_arch_data - omap iommu private data
+ * @name: name of the iommu device
+ * @iommu_dev: handle of the iommu device
+ *
+ * This is an omap iommu private data object, which binds an iommu user
+ * to its iommu device. This object should be placed at the iommu user's
+ * dev_archdata so generic IOMMU API can be used without having to
+ * utilize omap-specific plumbing anymore.
+ */
+struct omap_iommu_arch_data {
+	const char *name;
+	struct omap_iommu *iommu_dev;
+};
+
+/**
+ * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
+ * @dev: iommu client device
+ */
+static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
+{
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+
+	return arch_data->iommu_dev;
+}
+
 /* IOMMU errors */
 #define OMAP_IOMMU_ERR_TLB_MISS		(1 << 0)
 #define OMAP_IOMMU_ERR_TRANS_FAULT	(1 << 1)
@@ -163,8 +189,8 @@ extern int omap_iommu_set_isr(const char *name,
 				    void *priv),
 			 void *isr_priv);
 
-extern void omap_iommu_save_ctx(struct omap_iommu *obj);
-extern void omap_iommu_restore_ctx(struct omap_iommu *obj);
+extern void omap_iommu_save_ctx(struct device *dev);
+extern void omap_iommu_restore_ctx(struct device *dev);
 
 extern int omap_install_iommu_arch(const struct iommu_functions *ops);
 extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
@@ -176,6 +202,5 @@ extern ssize_t
 omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
 extern size_t
 omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
-struct device *omap_find_iommu_device(const char *name);
 
 #endif /* __MACH_IOMMU_H */
diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h
index 6af1a91..498e57c 100644
--- a/arch/arm/plat-omap/include/plat/iovmm.h
+++ b/arch/arm/plat-omap/include/plat/iovmm.h
@@ -72,18 +72,18 @@ struct iovm_struct {
 #define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
 
 
-extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da);
+extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
 extern u32
-omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
+omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
 			const struct sg_table *sgt, u32 flags);
 extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
-				struct omap_iommu *obj, u32 da);
+				struct device *dev, u32 da);
 extern u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
 				u32 da, size_t bytes, u32 flags);
 extern void
-omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
 				const u32 da);
-extern void *omap_da_to_va(struct omap_iommu *obj, u32 da);
+extern void *omap_da_to_va(struct device *dev, u32 da);
 
 #endif /* __IOMMU_MMAP_H */
diff --git a/arch/arm/plat-samsung/include/plat/keypad.h b/arch/arm/plat-samsung/include/plat/keypad.h
index b59a648..c81ace3 100644
--- a/arch/arm/plat-samsung/include/plat/keypad.h
+++ b/arch/arm/plat-samsung/include/plat/keypad.h
@@ -13,32 +13,7 @@
 #ifndef __PLAT_SAMSUNG_KEYPAD_H
 #define __PLAT_SAMSUNG_KEYPAD_H
 
-#include <linux/input/matrix_keypad.h>
-
-#define SAMSUNG_MAX_ROWS	8
-#define SAMSUNG_MAX_COLS	8
-
-/**
- * struct samsung_keypad_platdata - Platform device data for Samsung Keypad.
- * @keymap_data: pointer to &matrix_keymap_data.
- * @rows: number of keypad row supported.
- * @cols: number of keypad col supported.
- * @no_autorepeat: disable key autorepeat.
- * @wakeup: controls whether the device should be set up as wakeup source.
- * @cfg_gpio: configure the GPIO.
- *
- * Initialisation data specific to either the machine or the platform
- * for the device driver to use or call-back when configuring gpio.
- */
-struct samsung_keypad_platdata {
-	const struct matrix_keymap_data	*keymap_data;
-	unsigned int rows;
-	unsigned int cols;
-	bool no_autorepeat;
-	bool wakeup;
-
-	void (*cfg_gpio)(unsigned int rows, unsigned int cols);
-};
+#include <linux/input/samsung-keypad.h>
 
 /**
  * samsung_keypad_set_platdata - Set platform data for Samsung Keypad device.
diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig
index 5edcb58..0b7039c 100644
--- a/arch/blackfin/configs/BF518F-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
@@ -80,7 +80,7 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
index 2e54957..5553205 100644
--- a/arch/blackfin/configs/BF526-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig
@@ -97,7 +97,7 @@ CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_WATCHDOG=y
diff --git a/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig b/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig
index ad0881b..d95658f 100644
--- a/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig
+++ b/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig
@@ -68,7 +68,7 @@ CONFIG_I2C_ALGOBIT=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=400
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
index 8465b3e..498f64a 100644
--- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
@@ -105,7 +105,7 @@ CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
index 5e7321b..72e0317 100644
--- a/arch/blackfin/configs/BF527-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig
@@ -99,7 +99,7 @@ CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig
index a7eb54b..2f075e0 100644
--- a/arch/blackfin/configs/BF533-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
@@ -81,7 +81,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig
index b90d379..ab38a82 100644
--- a/arch/blackfin/configs/BF533-STAMP_defconfig
+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
@@ -84,7 +84,7 @@ CONFIG_I2C=m
 CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_GPIO=m
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig
index 0053625..5c802d6 100644
--- a/arch/blackfin/configs/BF537-STAMP_defconfig
+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
@@ -94,7 +94,7 @@ CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BLACKFIN_TWI=m
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig
index 580bf42..972aa62 100644
--- a/arch/blackfin/configs/BF538-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
@@ -101,7 +101,7 @@ CONFIG_I2C=m
 CONFIG_I2C_BLACKFIN_TWI=m
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index 0e6d841..7a1e3bf 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig
@@ -113,7 +113,7 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF561-ACVILON_defconfig b/arch/blackfin/configs/BF561-ACVILON_defconfig
index 77a27e3..0fdc4ec 100644
--- a/arch/blackfin/configs/BF561-ACVILON_defconfig
+++ b/arch/blackfin/configs/BF561-ACVILON_defconfig
@@ -85,7 +85,7 @@ CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PCA_PLATFORM=y
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_SPI_SPIDEV=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
diff --git a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
index f5ed34e..78adbbf 100644
--- a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
@@ -84,7 +84,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig
index d7ff2ae..d3cd0f5 100644
--- a/arch/blackfin/configs/BF561-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
@@ -86,7 +86,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
index 8501431..7b982d0 100644
--- a/arch/blackfin/configs/BlackStamp_defconfig
+++ b/arch/blackfin/configs/BlackStamp_defconfig
@@ -80,7 +80,7 @@ CONFIG_I2C=m
 CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_GPIO=m
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_SPI_SPIDEV=m
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig
index dbf750c..c280a50 100644
--- a/arch/blackfin/configs/CM-BF527_defconfig
+++ b/arch/blackfin/configs/CM-BF527_defconfig
@@ -88,7 +88,7 @@ CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BLACKFIN_TWI=m
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_WATCHDOG=y
diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig
index 07ffbda..c940a1e 100644
--- a/arch/blackfin/configs/CM-BF533_defconfig
+++ b/arch/blackfin/configs/CM-BF533_defconfig
@@ -57,7 +57,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig
index 707cbf8..2e47df7 100644
--- a/arch/blackfin/configs/CM-BF537E_defconfig
+++ b/arch/blackfin/configs/CM-BF537E_defconfig
@@ -78,7 +78,7 @@ CONFIG_SERIAL_BFIN_UART1=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_USB_GADGET=m
diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig
index 4596935..6da629f 100644
--- a/arch/blackfin/configs/CM-BF537U_defconfig
+++ b/arch/blackfin/configs/CM-BF537U_defconfig
@@ -72,7 +72,7 @@ CONFIG_SERIAL_BFIN_UART1=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_USB_GADGET=y
diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig
index 9f1d084..349922b 100644
--- a/arch/blackfin/configs/CM-BF548_defconfig
+++ b/arch/blackfin/configs/CM-BF548_defconfig
@@ -89,7 +89,7 @@ CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig
index 6c7b215..0456dea 100644
--- a/arch/blackfin/configs/CM-BF561_defconfig
+++ b/arch/blackfin/configs/CM-BF561_defconfig
@@ -78,7 +78,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_USB_GADGET=m
diff --git a/arch/blackfin/configs/DNP5370_defconfig b/arch/blackfin/configs/DNP5370_defconfig
index b192acf..89162d0 100644
--- a/arch/blackfin/configs/DNP5370_defconfig
+++ b/arch/blackfin/configs/DNP5370_defconfig
@@ -78,7 +78,7 @@ CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_SPI_SPIDEV=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig
index 06e9f49..a26436b 100644
--- a/arch/blackfin/configs/H8606_defconfig
+++ b/arch/blackfin/configs/H8606_defconfig
@@ -68,7 +68,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_SPI_SPIDEV=y
 CONFIG_WATCHDOG=y
 CONFIG_SOUND=m
diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig
index 5e797cf..6479915 100644
--- a/arch/blackfin/configs/IP0X_defconfig
+++ b/arch/blackfin/configs/IP0X_defconfig
@@ -70,7 +70,7 @@ CONFIG_SERIAL_BFIN_CONSOLE=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_HW_RANDOM=y
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_USB=y
diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig
index a566a2f..8fd9b44 100644
--- a/arch/blackfin/configs/PNAV-10_defconfig
+++ b/arch/blackfin/configs/PNAV-10_defconfig
@@ -84,7 +84,7 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig
index 12e66cd..0520c16 100644
--- a/arch/blackfin/configs/SRV1_defconfig
+++ b/arch/blackfin/configs/SRV1_defconfig
@@ -71,7 +71,7 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_HWMON=m
 CONFIG_WATCHDOG=y
 CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/configs/TCM-BF518_defconfig b/arch/blackfin/configs/TCM-BF518_defconfig
index d496ae9..e4ed865 100644
--- a/arch/blackfin/configs/TCM-BF518_defconfig
+++ b/arch/blackfin/configs/TCM-BF518_defconfig
@@ -92,7 +92,7 @@ CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_BLACKFIN_TWI=y
 CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
 # CONFIG_HWMON is not set
diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig
index 65f6421..c1f45f1 100644
--- a/arch/blackfin/configs/TCM-BF537_defconfig
+++ b/arch/blackfin/configs/TCM-BF537_defconfig
@@ -70,7 +70,7 @@ CONFIG_SERIAL_BFIN_UART1=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_SPI=y
-CONFIG_SPI_BFIN=y
+CONFIG_SPI_BFIN5XX=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/include/asm/bfin_serial.h b/arch/blackfin/include/asm/bfin_serial.h
index ecacdf3..68bcc3d 100644
--- a/arch/blackfin/include/asm/bfin_serial.h
+++ b/arch/blackfin/include/asm/bfin_serial.h
@@ -51,9 +51,6 @@ struct bfin_serial_port {
 #elif ANOMALY_05000363
 	unsigned int anomaly_threshold;
 #endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	int scts;
-#endif
 #if defined(CONFIG_SERIAL_BFIN_CTSRTS) || \
 	defined(CONFIG_SERIAL_BFIN_HARD_CTSRTS)
 	int cts_pin;
diff --git a/arch/blackfin/include/asm/cpu.h b/arch/blackfin/include/asm/cpu.h
index 0504378..e349631 100644
--- a/arch/blackfin/include/asm/cpu.h
+++ b/arch/blackfin/include/asm/cpu.h
@@ -14,6 +14,9 @@ struct blackfin_cpudata {
 	struct cpu cpu;
 	unsigned int imemctl;
 	unsigned int dmemctl;
+#ifdef CONFIG_SMP
+	struct task_struct *idle;
+#endif
 };
 
 DECLARE_PER_CPU(struct blackfin_cpudata, cpu_data);
diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h
index af6c0aa..dc3d144 100644
--- a/arch/blackfin/include/asm/smp.h
+++ b/arch/blackfin/include/asm/smp.h
@@ -37,7 +37,7 @@ extern unsigned long dcache_invld_count[NR_CPUS];
 #endif
 
 void smp_icache_flush_range_others(unsigned long start,
-				   unsigned long end);
+					unsigned long end);
 #ifdef CONFIG_HOTPLUG_CPU
 void coreb_die(void);
 void cpu_die(void);
@@ -46,4 +46,7 @@ int __cpu_disable(void);
 int __cpu_die(unsigned int cpu);
 #endif
 
+void smp_timer_broadcast(const struct cpumask *mask);
+
+
 #endif /* !__ASM_BLACKFIN_SMP_H */
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index dfa2525..d6102c8 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -828,10 +828,18 @@ static inline int __init get_mem_size(void)
 	u32 ddrctl = bfin_read_EBIU_DDRCTL1();
 	int ret = 0;
 	switch (ddrctl & 0xc0000) {
-		case DEVSZ_64:  ret = 64 / 8;
-		case DEVSZ_128: ret = 128 / 8;
-		case DEVSZ_256: ret = 256 / 8;
-		case DEVSZ_512: ret = 512 / 8;
+	case DEVSZ_64:
+		ret = 64 / 8;
+		break;
+	case DEVSZ_128:
+		ret = 128 / 8;
+		break;
+	case DEVSZ_256:
+		ret = 256 / 8;
+		break;
+	case DEVSZ_512:
+		ret = 512 / 8;
+		break;
 	}
 	switch (ddrctl & 0x30000) {
 		case DEVWD_4:  ret *= 2;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 1bcf3a3..d98f2d6 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -219,7 +219,7 @@ static void __init bfin_gptmr0_clockevent_init(struct clock_event_device *evt)
 
 #if defined(CONFIG_TICKSOURCE_CORETMR)
 /* per-cpu local core timer */
-static DEFINE_PER_CPU(struct clock_event_device, coretmr_events);
+DEFINE_PER_CPU(struct clock_event_device, coretmr_events);
 
 static int bfin_coretmr_set_next_event(unsigned long cycles,
 				struct clock_event_device *evt)
@@ -281,6 +281,7 @@ void bfin_coretmr_init(void)
 #ifdef CONFIG_CORE_TIMER_IRQ_L1
 __attribute__((l1_text))
 #endif
+
 irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)
 {
 	int cpu = smp_processor_id();
@@ -306,6 +307,11 @@ void bfin_coretmr_clockevent_init(void)
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *evt = &per_cpu(coretmr_events, cpu);
 
+#ifdef CONFIG_SMP
+	evt->broadcast = smp_timer_broadcast;
+#endif
+
+
 	evt->name = "bfin_core_timer";
 	evt->rating = 350;
 	evt->irq = -1;
diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index d1c0c0c..a2d96d3 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -61,7 +61,7 @@ static struct physmap_flash_data ezbrd_flash_data = {
 
 static struct resource ezbrd_flash_resource = {
 	.start = 0x20000000,
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	.end   = 0x202fffff,
 #else
 	.end   = 0x203fffff,
@@ -122,6 +122,8 @@ static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
 #if defined(CONFIG_NET_DSA_KSZ8893M) || defined(CONFIG_NET_DSA_KSZ8893M_MODULE)
 	.phy_mask = 0xfff7, /* Only probe the port phy connect to the on chip MAC */
 #endif
+	.vlan1_mask = 1,
+	.vlan2_mask = 2,
 };
 
 static struct platform_device bfin_mii_bus = {
@@ -292,7 +294,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 };
 
 /* SPI controller data */
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = 6,
@@ -715,7 +717,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 #endif
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 	&bfin_spi1_device,
 #endif
@@ -777,7 +779,7 @@ static int __init ezbrd_init(void)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 	/* setup BF518-EZBRD GPIO pin PG11 to AMS2, PG15 to AMS3. */
 	peripheral_request(P_AMS2, "ParaFlash");
-#if !defined(CONFIG_SPI_BFIN) && !defined(CONFIG_SPI_BFIN_MODULE)
+#if !defined(CONFIG_SPI_BFIN5XX) && !defined(CONFIG_SPI_BFIN5XX_MODULE)
 	peripheral_request(P_AMS3, "ParaFlash");
 #endif
 	return 0;
diff --git a/arch/blackfin/mach-bf518/boards/tcm-bf518.c b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
index 5470bf8..f271310 100644
--- a/arch/blackfin/mach-bf518/boards/tcm-bf518.c
+++ b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
@@ -228,7 +228,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 };
 
 /* SPI controller data */
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = 6,
@@ -635,7 +635,7 @@ static struct platform_device *tcm_devices[] __initdata = {
 	&bfin_mac_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 	&bfin_spi1_device,
 #endif
diff --git a/arch/blackfin/mach-bf527/boards/ad7160eval.c b/arch/blackfin/mach-bf527/boards/ad7160eval.c
index 5bc6938..c8d5d2b 100644
--- a/arch/blackfin/mach-bf527/boards/ad7160eval.c
+++ b/arch/blackfin/mach-bf527/boards/ad7160eval.c
@@ -334,7 +334,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS,
@@ -744,7 +744,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_mac_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c
index cd28969..7330607 100644
--- a/arch/blackfin/mach-bf527/boards/cm_bf527.c
+++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c
@@ -444,7 +444,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = 8,
@@ -893,7 +893,7 @@ static struct platform_device *cmbf527_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index 9f792ea..db3ecfc 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -371,7 +371,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = 8,
@@ -776,7 +776,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_mac_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index 3ecafff..dfdd8e6 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -664,7 +664,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = 8,
@@ -1189,7 +1189,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index 3a92c43..360e97f 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -448,7 +448,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = EXP_GPIO_SPISEL_BASE + 8 + MAX_CTRL_CS,
@@ -831,7 +831,7 @@ static struct platform_device *tll6527m_devices[] __initdata = {
 	&bfin_mac_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index 47cadd3..6cb7b3e 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -125,7 +125,7 @@ static struct platform_device net2272_bfin_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
@@ -398,7 +398,7 @@ static struct platform_device *h8606_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -428,7 +428,7 @@ static int __init H8606_init(void)
 	printk(KERN_INFO "HV Sistemas H8606 board support by http://www.hvsistemas.com\n");
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(h8606_devices, ARRAY_SIZE(h8606_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 	return 0;
diff --git a/arch/blackfin/mach-bf533/boards/blackstamp.c b/arch/blackfin/mach-bf533/boards/blackstamp.c
index 18817d5..de44a37 100644
--- a/arch/blackfin/mach-bf533/boards/blackstamp.c
+++ b/arch/blackfin/mach-bf533/boards/blackstamp.c
@@ -146,7 +146,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -422,7 +422,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 #endif
 
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf533/boards/cm_bf533.c b/arch/blackfin/mach-bf533/boards/cm_bf533.c
index 2c8f30e..fe47e04 100644
--- a/arch/blackfin/mach-bf533/boards/cm_bf533.c
+++ b/arch/blackfin/mach-bf533/boards/cm_bf533.c
@@ -29,7 +29,7 @@
  */
 const char bfin_board_name[] = "Bluetechnix CM BF533";
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
 static struct mtd_partition bfin_spi_flash_partitions[] = {
@@ -536,7 +536,7 @@ static struct platform_device *cm_bf533_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -549,7 +549,7 @@ static int __init cm_bf533_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf533_devices, ARRAY_SIZE(cm_bf533_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 	return 0;
diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c
index 144556e..07811c2 100644
--- a/arch/blackfin/mach-bf533/boards/ezkit.c
+++ b/arch/blackfin/mach-bf533/boards/ezkit.c
@@ -245,7 +245,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -484,7 +484,7 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&smc91x_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf533/boards/ip0x.c b/arch/blackfin/mach-bf533/boards/ip0x.c
index b597d4e..e303dae 100644
--- a/arch/blackfin/mach-bf533/boards/ip0x.c
+++ b/arch/blackfin/mach-bf533/boards/ip0x.c
@@ -104,7 +104,7 @@ static struct platform_device dm9000_device2 = {
 #endif
 
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
@@ -270,7 +270,7 @@ static struct platform_device *ip0x_devices[] __initdata = {
 #endif
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&spi_bfin_master_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index 2afd02e..ce88a71 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -219,9 +219,10 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_AD183X) || defined(CONFIG_SND_BF5XX_SOC_AD183X_MODULE)
+#if defined(CONFIG_SND_BF5XX_SOC_AD1836) || \
+	defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 	{
-		.modalias = "ad183x",
+		.modalias = "ad1836",
 		.max_speed_hz = 3125000,     /* max spi clock (SCK) speed in HZ */
 		.bus_num = 0,
 		.chip_select = 4,
@@ -251,7 +252,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -471,7 +472,7 @@ static struct i2c_gpio_platform_data i2c_gpio_data = {
 	.scl_pin		= GPIO_PF3,
 	.sda_is_open_drain	= 0,
 	.scl_is_open_drain	= 0,
-	.udelay			= 40,
+	.udelay			= 10,
 };
 
 static struct platform_device i2c_gpio_device = {
@@ -540,27 +541,150 @@ static struct platform_device bfin_dpmc = {
 	},
 };
 
+#if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \
+	defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) \
+	|| defined(CONFIG_SND_BF5XX_AC97) || \
+	defined(CONFIG_SND_BF5XX_AC97_MODULE)
+
+#include <asm/bfin_sport.h>
+
+#define SPORT_REQ(x) \
+	[x] = {P_SPORT##x##_TFS, P_SPORT##x##_DTPRI, P_SPORT##x##_TSCLK, \
+		P_SPORT##x##_RFS, P_SPORT##x##_DRPRI, P_SPORT##x##_RSCLK, 0}
+
+static const u16 bfin_snd_pin[][7] = {
+	SPORT_REQ(0),
+	SPORT_REQ(1),
+};
+
+static struct bfin_snd_platform_data bfin_snd_data[] = {
+	{
+		.pin_req = &bfin_snd_pin[0][0],
+	},
+	{
+		.pin_req = &bfin_snd_pin[1][0],
+	},
+};
+
+#define BFIN_SND_RES(x) \
+	[x] = { \
+		{ \
+			.start = SPORT##x##_TCR1, \
+			.end = SPORT##x##_TCR1, \
+			.flags = IORESOURCE_MEM \
+		}, \
+		{ \
+			.start = CH_SPORT##x##_RX, \
+			.end = CH_SPORT##x##_RX, \
+			.flags = IORESOURCE_DMA, \
+		}, \
+		{ \
+			.start = CH_SPORT##x##_TX, \
+			.end = CH_SPORT##x##_TX, \
+			.flags = IORESOURCE_DMA, \
+		}, \
+		{ \
+			.start = IRQ_SPORT##x##_ERROR, \
+			.end = IRQ_SPORT##x##_ERROR, \
+			.flags = IORESOURCE_IRQ, \
+		} \
+	}
+
+static struct resource bfin_snd_resources[][4] = {
+	BFIN_SND_RES(0),
+	BFIN_SND_RES(1),
+};
+#endif
+
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
+static struct platform_device bfin_i2s_pcm = {
+	.name = "bfin-i2s-pcm-audio",
+	.id = -1,
+};
+#endif
+
+#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
+static struct platform_device bfin_tdm_pcm = {
+	.name = "bfin-tdm-pcm-audio",
+	.id = -1,
+};
+#endif
+
+#if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
+static struct platform_device bfin_ac97_pcm = {
+	.name = "bfin-ac97-pcm-audio",
+	.id = -1,
+};
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_AD73311) || \
+	defined(CONFIG_SND_BF5XX_SOC_AD73311_MODULE)
+static const unsigned ad73311_gpio[] = {
+	GPIO_PF4,
+};
+
+static struct platform_device bfin_ad73311_machine = {
+	.name = "bfin-snd-ad73311",
+	.id = 1,
+	.dev = {
+		.platform_data = (void *)ad73311_gpio,
+	},
+};
+#endif
+
+#if defined(CONFIG_SND_SOC_AD73311) || defined(CONFIG_SND_SOC_AD73311_MODULE)
+static struct platform_device bfin_ad73311_codec_device = {
+	.name = "ad73311",
+	.id = -1,
+};
+#endif
+
+#if defined(CONFIG_SND_SOC_AD74111) || defined(CONFIG_SND_SOC_AD74111_MODULE)
+static struct platform_device bfin_ad74111_codec_device = {
+	.name = "ad74111",
+	.id = -1,
+};
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_I2S) || \
+	defined(CONFIG_SND_BF5XX_SOC_I2S_MODULE)
 static struct platform_device bfin_i2s = {
 	.name = "bfin-i2s",
 	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
+	.num_resources =
+		ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
+	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
+	.dev = {
+		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
+	},
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
+#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \
+	defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
 static struct platform_device bfin_tdm = {
 	.name = "bfin-tdm",
 	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
+	.num_resources =
+		ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
+	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
+	.dev = {
+		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
+	},
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
+#if defined(CONFIG_SND_BF5XX_SOC_AC97) || \
+	defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
 	.name = "bfin-ac97",
 	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
+	.num_resources =
+		ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
+	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
+	.dev = {
+		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
+	},
 };
 #endif
 
@@ -580,7 +704,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -596,7 +720,8 @@ static struct platform_device *stamp_devices[] __initdata = {
 #endif
 #endif
 
-#if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE)
+#if defined(CONFIG_SERIAL_BFIN_SPORT) || \
+	defined(CONFIG_SERIAL_BFIN_SPORT_MODULE)
 #ifdef CONFIG_SERIAL_BFIN_SPORT0_UART
 	&bfin_sport0_uart_device,
 #endif
@@ -618,14 +743,42 @@ static struct platform_device *stamp_devices[] __initdata = {
 #endif
 
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
-	&bfin_i2s,
+	&bfin_i2s_pcm,
 #endif
 
 #if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
+	&bfin_tdm_pcm,
 #endif
 
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
+	&bfin_ac97_pcm,
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_AD73311) || \
+	defined(CONFIG_SND_BF5XX_SOC_AD73311_MODULE)
+	&bfin_ad73311_machine,
+#endif
+
+#if defined(CONFIG_SND_SOC_AD73311) || defined(CONFIG_SND_SOC_AD73311_MODULE)
+	&bfin_ad73311_codec_device,
+#endif
+
+#if defined(CONFIG_SND_SOC_AD74111) || defined(CONFIG_SND_SOC_AD74111_MODULE)
+	&bfin_ad74111_codec_device,
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_I2S) || \
+	defined(CONFIG_SND_BF5XX_SOC_I2S_MODULE)
+	&bfin_i2s,
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \
+	defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
+	&bfin_tdm,
+#endif
+
+#if defined(CONFIG_SND_BF5XX_SOC_AC97) || \
+	defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 	&bfin_ac97,
 #endif
 };
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
index 604a430..0d4a2f6 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
@@ -31,7 +31,7 @@
  */
 const char bfin_board_name[] = "Bluetechnix CM BF537E";
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
@@ -735,7 +735,7 @@ static struct platform_device *cm_bf537e_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -770,7 +770,7 @@ static int __init cm_bf537e_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf537e_devices, ARRAY_SIZE(cm_bf537e_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
index d916b46..f553698 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
@@ -32,7 +32,7 @@
  */
 const char bfin_board_name[] = "Bluetechnix CM BF537U";
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
@@ -700,7 +700,7 @@ static struct platform_device *cm_bf537u_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -747,7 +747,7 @@ static int __init cm_bf537u_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf537u_devices, ARRAY_SIZE(cm_bf537u_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/dnp5370.c b/arch/blackfin/mach-bf537/boards/dnp5370.c
index 5f30722..11dadeb 100644
--- a/arch/blackfin/mach-bf537/boards/dnp5370.c
+++ b/arch/blackfin/mach-bf537/boards/dnp5370.c
@@ -125,7 +125,7 @@ static struct platform_device asmb_flash_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 
 #if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
 
@@ -370,7 +370,7 @@ static struct platform_device *dnp5370_devices[] __initdata = {
 	&bfin_mac_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&spi_bfin_master_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/minotaur.c b/arch/blackfin/mach-bf537/boards/minotaur.c
index 3901dd0..d2d7128 100644
--- a/arch/blackfin/mach-bf537/boards/minotaur.c
+++ b/arch/blackfin/mach-bf537/boards/minotaur.c
@@ -121,7 +121,7 @@ static struct platform_device net2272_bfin_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) \
@@ -496,7 +496,7 @@ static struct platform_device *minotaur_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -537,7 +537,7 @@ static int __init minotaur_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(minotaur_devices, ARRAY_SIZE(minotaur_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info,
 				ARRAY_SIZE(bfin_spi_board_info));
 #endif
diff --git a/arch/blackfin/mach-bf537/boards/pnav10.c b/arch/blackfin/mach-bf537/boards/pnav10.c
index aebd31c..6fd8470 100644
--- a/arch/blackfin/mach-bf537/boards/pnav10.c
+++ b/arch/blackfin/mach-bf537/boards/pnav10.c
@@ -154,7 +154,7 @@ static struct platform_device net2272_bfin_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) \
@@ -477,7 +477,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -508,7 +508,7 @@ static int __init pnav_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info,
 				ARRAY_SIZE(bfin_spi_board_info));
 #endif
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 7fbb0bb..2221173 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -1420,7 +1420,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #endif
 };
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI controller data */
 static struct bfin5xx_spi_master bfin_spi0_info = {
 	.num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS,
@@ -1462,7 +1462,7 @@ static struct platform_device bfin_spi0_device = {
 
 /* SPORT SPI controller data */
 static struct bfin5xx_spi_master bfin_sport_spi0_info = {
-	.num_chipselect = 1, /* master only supports one device */
+	.num_chipselect = MAX_BLACKFIN_GPIOS,
 	.enable_dma = 0,  /* master don't support DMA */
 	.pin_req = {P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_DRPRI,
 		P_SPORT0_RSCLK, P_SPORT0_TFS, P_SPORT0_RFS, 0},
@@ -1492,7 +1492,7 @@ static struct platform_device bfin_sport_spi0_device = {
 };
 
 static struct bfin5xx_spi_master bfin_sport_spi1_info = {
-	.num_chipselect = 1, /* master only supports one device */
+	.num_chipselect = MAX_BLACKFIN_GPIOS,
 	.enable_dma = 0,  /* master don't support DMA */
 	.pin_req = {P_SPORT1_DTPRI, P_SPORT1_TSCLK, P_SPORT1_DRPRI,
 		P_SPORT1_RSCLK, P_SPORT1_TFS, P_SPORT1_RFS, 0},
@@ -1558,6 +1558,71 @@ static struct platform_device bfin_lq035q1_device = {
 };
 #endif
 
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+#include <linux/videodev2.h>
+#include <media/blackfin/bfin_capture.h>
+#include <media/blackfin/ppi.h>
+
+static const unsigned short ppi_req[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const struct ppi_info ppi_info = {
+	.type = PPI_TYPE_PPI,
+	.dma_ch = CH_PPI,
+	.irq_err = IRQ_PPI_ERROR,
+	.base = (void __iomem *)PPI_CONTROL,
+	.pin_req = ppi_req,
+};
+
+#if defined(CONFIG_VIDEO_VS6624) \
+	|| defined(CONFIG_VIDEO_VS6624_MODULE)
+static struct v4l2_input vs6624_inputs[] = {
+	{
+		.index = 0,
+		.name = "Camera",
+		.type = V4L2_INPUT_TYPE_CAMERA,
+		.std = V4L2_STD_UNKNOWN,
+	},
+};
+
+static struct bcap_route vs6624_routes[] = {
+	{
+		.input = 0,
+		.output = 0,
+	},
+};
+
+static const unsigned vs6624_ce_pin = GPIO_PF10;
+
+static struct bfin_capture_config bfin_capture_data = {
+	.card_name = "BF537",
+	.inputs = vs6624_inputs,
+	.num_inputs = ARRAY_SIZE(vs6624_inputs),
+	.routes = vs6624_routes,
+	.i2c_adapter_id = 0,
+	.board_info = {
+		.type = "vs6624",
+		.addr = 0x10,
+		.platform_data = (void *)&vs6624_ce_pin,
+	},
+	.ppi_info = &ppi_info,
+	.ppi_control = (PACK_EN | DLEN_8 | XFR_TYPE | 0x0020),
+};
+#endif
+
+static struct platform_device bfin_capture_device = {
+	.name = "bfin_capture",
+	.dev = {
+		.platform_data = &bfin_capture_data,
+	},
+};
+#endif
+
 #if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE)
 #ifdef CONFIG_SERIAL_BFIN_UART0
 static struct resource bfin_uart0_resources[] = {
@@ -2716,7 +2781,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -2733,6 +2798,11 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_lq035q1_device,
 #endif
 
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+	&bfin_capture_device,
+#endif
+
 #if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE)
 #ifdef CONFIG_SERIAL_BFIN_UART0
 	&bfin_uart0_device,
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 6917ce2..9885176 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -32,7 +32,7 @@
  */
 const char bfin_board_name[] = "Bluetechnix TCM BF537";
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
@@ -702,7 +702,7 @@ static struct platform_device *cm_bf537_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -737,7 +737,7 @@ static int __init tcm_bf537_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf537_devices, ARRAY_SIZE(cm_bf537_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 
diff --git a/arch/blackfin/mach-bf538/boards/ezkit.c b/arch/blackfin/mach-bf538/boards/ezkit.c
index 8356eb5..1633a6f 100644
--- a/arch/blackfin/mach-bf538/boards/ezkit.c
+++ b/arch/blackfin/mach-bf538/boards/ezkit.c
@@ -490,7 +490,7 @@ static struct platform_device smc91x_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 #if defined(CONFIG_MTD_M25P80) \
 	|| defined(CONFIG_MTD_M25P80_MODULE)
@@ -874,7 +874,7 @@ static struct platform_device *cm_bf538_devices[] __initdata = {
 #endif
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bf538_spi_master0,
 	&bf538_spi_master1,
 	&bf538_spi_master2,
@@ -938,7 +938,7 @@ static int __init ezkit_init(void)
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf538_devices, ARRAY_SIZE(cm_bf538_devices));
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bf538_spi_board_info,
 			ARRAY_SIZE(bf538_spi_board_info));
 #endif
diff --git a/arch/blackfin/mach-bf548/boards/cm_bf548.c b/arch/blackfin/mach-bf548/boards/cm_bf548.c
index 0350eac..68af594 100644
--- a/arch/blackfin/mach-bf548/boards/cm_bf548.c
+++ b/arch/blackfin/mach-bf548/boards/cm_bf548.c
@@ -854,7 +854,7 @@ static struct platform_device para_flash_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 #if defined(CONFIG_MTD_M25P80) \
 	|| defined(CONFIG_MTD_M25P80_MODULE)
@@ -1175,7 +1175,7 @@ static struct platform_device *cm_bf548_devices[] __initdata = {
 	&bf54x_sdh_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bf54x_spi_master0,
 	&bf54x_spi_master1,
 #endif
@@ -1210,7 +1210,7 @@ static int __init cm_bf548_init(void)
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf548_devices, ARRAY_SIZE(cm_bf548_devices));
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bf54x_spi_board_info,
 			ARRAY_SIZE(bf54x_spi_board_info));
 #endif
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index bb868ac..3ea45f8 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -1110,7 +1110,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 };
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -1183,6 +1183,71 @@ static struct platform_device bf54x_spi_master1 = {
 };
 #endif  /* spi master and devices */
 
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+#include <linux/videodev2.h>
+#include <media/blackfin/bfin_capture.h>
+#include <media/blackfin/ppi.h>
+
+static const unsigned short ppi_req[] = {
+	P_PPI1_D0, P_PPI1_D1, P_PPI1_D2, P_PPI1_D3,
+	P_PPI1_D4, P_PPI1_D5, P_PPI1_D6, P_PPI1_D7,
+	P_PPI1_CLK, P_PPI1_FS1, P_PPI1_FS2,
+	0,
+};
+
+static const struct ppi_info ppi_info = {
+	.type = PPI_TYPE_EPPI,
+	.dma_ch = CH_EPPI1,
+	.irq_err = IRQ_EPPI1_ERROR,
+	.base = (void __iomem *)EPPI1_STATUS,
+	.pin_req = ppi_req,
+};
+
+#if defined(CONFIG_VIDEO_VS6624) \
+	|| defined(CONFIG_VIDEO_VS6624_MODULE)
+static struct v4l2_input vs6624_inputs[] = {
+	{
+		.index = 0,
+		.name = "Camera",
+		.type = V4L2_INPUT_TYPE_CAMERA,
+		.std = V4L2_STD_UNKNOWN,
+	},
+};
+
+static struct bcap_route vs6624_routes[] = {
+	{
+		.input = 0,
+		.output = 0,
+	},
+};
+
+static const unsigned vs6624_ce_pin = GPIO_PG6;
+
+static struct bfin_capture_config bfin_capture_data = {
+	.card_name = "BF548",
+	.inputs = vs6624_inputs,
+	.num_inputs = ARRAY_SIZE(vs6624_inputs),
+	.routes = vs6624_routes,
+	.i2c_adapter_id = 0,
+	.board_info = {
+		.type = "vs6624",
+		.addr = 0x10,
+		.platform_data = (void *)&vs6624_ce_pin,
+	},
+	.ppi_info = &ppi_info,
+	.ppi_control = (POLC | PACKEN | DLEN_8 | XFR_TYPE | 0x20),
+};
+#endif
+
+static struct platform_device bfin_capture_device = {
+	.name = "bfin_capture",
+	.dev = {
+		.platform_data = &bfin_capture_data,
+	},
+};
+#endif
+
 #if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE)
 static struct resource bfin_twi0_resource[] = {
 	[0] = {
@@ -1502,10 +1567,14 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bf54x_sdh_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bf54x_spi_master0,
 	&bf54x_spi_master1,
 #endif
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+	&bfin_capture_device,
+#endif
 
 #if defined(CONFIG_KEYBOARD_BFIN) || defined(CONFIG_KEYBOARD_BFIN_MODULE)
 	&bf54x_kpad_device,
diff --git a/arch/blackfin/mach-bf561/boards/acvilon.c b/arch/blackfin/mach-bf561/boards/acvilon.c
index b1b7339..f6ffd6f 100644
--- a/arch/blackfin/mach-bf561/boards/acvilon.c
+++ b/arch/blackfin/mach-bf561/boards/acvilon.c
@@ -372,7 +372,7 @@ static struct bfin5xx_spi_chip data_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -475,7 +475,7 @@ static struct platform_device bfin_dpmc = {
 static struct platform_device *acvilon_devices[] __initdata = {
 	&bfin_dpmc,
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index c017cf0..d81450f 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -29,7 +29,7 @@
  */
 const char bfin_board_name[] = "Bluetechnix CM BF561";
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* all SPI peripherals info goes here */
 
 #if defined(CONFIG_MTD_M25P80) || defined(CONFIG_MTD_M25P80_MODULE)
@@ -488,7 +488,7 @@ static struct platform_device *cm_bf561_devices[] __initdata = {
 	&net2272_bfin_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -523,7 +523,7 @@ static int __init cm_bf561_init(void)
 {
 	printk(KERN_INFO "%s(): registering device resources\n", __func__);
 	platform_add_devices(cm_bf561_devices, ARRAY_SIZE(cm_bf561_devices));
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 #endif
 
diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
index 27f22ed..8389788 100644
--- a/arch/blackfin/mach-bf561/boards/ezkit.c
+++ b/arch/blackfin/mach-bf561/boards/ezkit.c
@@ -291,7 +291,7 @@ static struct platform_device ezkit_flash_device = {
 };
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	[0] = {
@@ -383,7 +383,7 @@ static struct i2c_gpio_platform_data i2c_gpio_data = {
 	.scl_pin		= GPIO_PF0,
 	.sda_is_open_drain	= 0,
 	.scl_is_open_drain	= 0,
-	.udelay			= 40,
+	.udelay			= 10,
 };
 
 static struct platform_device i2c_gpio_device = {
@@ -422,6 +422,96 @@ static struct platform_device bfin_dpmc = {
 	},
 };
 
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+#include <linux/videodev2.h>
+#include <media/blackfin/bfin_capture.h>
+#include <media/blackfin/ppi.h>
+
+static const unsigned short ppi_req[] = {
+	P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3,
+	P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7,
+	P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2,
+	0,
+};
+
+static const struct ppi_info ppi_info = {
+	.type = PPI_TYPE_PPI,
+	.dma_ch = CH_PPI0,
+	.irq_err = IRQ_PPI1_ERROR,
+	.base = (void __iomem *)PPI0_CONTROL,
+	.pin_req = ppi_req,
+};
+
+#if defined(CONFIG_VIDEO_ADV7183) \
+	|| defined(CONFIG_VIDEO_ADV7183_MODULE)
+#include <media/adv7183.h>
+static struct v4l2_input adv7183_inputs[] = {
+	{
+		.index = 0,
+		.name = "Composite",
+		.type = V4L2_INPUT_TYPE_CAMERA,
+		.std = V4L2_STD_ALL,
+	},
+	{
+		.index = 1,
+		.name = "S-Video",
+		.type = V4L2_INPUT_TYPE_CAMERA,
+		.std = V4L2_STD_ALL,
+	},
+	{
+		.index = 2,
+		.name = "Component",
+		.type = V4L2_INPUT_TYPE_CAMERA,
+		.std = V4L2_STD_ALL,
+	},
+};
+
+static struct bcap_route adv7183_routes[] = {
+	{
+		.input = ADV7183_COMPOSITE4,
+		.output = ADV7183_8BIT_OUT,
+	},
+	{
+		.input = ADV7183_SVIDEO0,
+		.output = ADV7183_8BIT_OUT,
+	},
+	{
+		.input = ADV7183_COMPONENT0,
+		.output = ADV7183_8BIT_OUT,
+	},
+};
+
+
+static const unsigned adv7183_gpio[] = {
+	GPIO_PF13, /* reset pin */
+	GPIO_PF2,  /* output enable pin */
+};
+
+static struct bfin_capture_config bfin_capture_data = {
+	.card_name = "BF561",
+	.inputs = adv7183_inputs,
+	.num_inputs = ARRAY_SIZE(adv7183_inputs),
+	.routes = adv7183_routes,
+	.i2c_adapter_id = 0,
+	.board_info = {
+		.type = "adv7183",
+		.addr = 0x20,
+		.platform_data = (void *)adv7183_gpio,
+	},
+	.ppi_info = &ppi_info,
+	.ppi_control = (PACK_EN | DLEN_8 | DMA32 | FLD_SEL),
+};
+#endif
+
+static struct platform_device bfin_capture_device = {
+	.name = "bfin_capture",
+	.dev = {
+		.platform_data = &bfin_capture_data,
+	},
+};
+#endif
+
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
 static struct platform_device bfin_i2s = {
 	.name = "bfin-i2s",
@@ -462,7 +552,7 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bfin_isp1760_device,
 #endif
 
-#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+#if defined(CONFIG_SPI_BFIN5XX) || defined(CONFIG_SPI_BFIN5XX_MODULE)
 	&bfin_spi0_device,
 #endif
 
@@ -494,6 +584,11 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&ezkit_flash_device,
 #endif
 
+#if defined(CONFIG_VIDEO_BLACKFIN_CAPTURE) \
+	|| defined(CONFIG_VIDEO_BLACKFIN_CAPTURE_MODULE)
+	&bfin_capture_device,
+#endif
+
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
 	&bfin_i2s,
 #endif
diff --git a/arch/blackfin/mach-bf561/include/mach/pll.h b/arch/blackfin/mach-bf561/include/mach/pll.h
index 7977db2..00bdace 100644
--- a/arch/blackfin/mach-bf561/include/mach/pll.h
+++ b/arch/blackfin/mach-bf561/include/mach/pll.h
@@ -16,6 +16,7 @@
 #include <mach/irq.h>
 
 #define SUPPLE_0_WAKEUP ((IRQ_SUPPLE_0 - (IRQ_CORETMR + 1)) % 32)
+#define SUPPLE_1_WAKEUP ((IRQ_SUPPLE_1 - (IRQ_CORETMR + 1)) % 32)
 
 static inline void
 bfin_iwr_restore(unsigned long iwr0, unsigned long iwr1, unsigned long iwr2)
@@ -42,7 +43,8 @@ bfin_iwr_save(unsigned long niwr0, unsigned long niwr1, unsigned long niwr2,
 static inline void
 bfin_iwr_set_sup0(unsigned long *iwr0, unsigned long *iwr1, unsigned long *iwr2)
 {
-	bfin_iwr_save(0, IWR_ENABLE(SUPPLE_0_WAKEUP), 0, iwr0, iwr1, iwr2);
+	bfin_iwr_save(0, IWR_ENABLE(SUPPLE_0_WAKEUP) |
+			IWR_ENABLE(SUPPLE_1_WAKEUP), 0, iwr0, iwr1, iwr2);
 }
 
 #endif
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index db22401..ab1c617 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -84,7 +84,7 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle
 
 	if ((bfin_read_SYSCR() & COREB_SRAM_INIT) == 0) {
 		/* CoreB already running, sending ipi to wakeup it */
-		platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0);
+		smp_send_reschedule(cpu);
 	} else {
 		/* Kick CoreB, which should start execution from CORE_SRAM_BASE. */
 		bfin_write_SYSCR(bfin_read_SYSCR() & ~COREB_SRAM_INIT);
@@ -114,7 +114,8 @@ void __init platform_request_ipi(int irq, void *handler)
 	int ret;
 	const char *name = (irq == IRQ_SUPPLE_0) ? supple0 : supple1;
 
-	ret = request_irq(irq, handler, IRQF_PERCPU, name, handler);
+	ret = request_irq(irq, handler, IRQF_PERCPU | IRQF_NO_SUSPEND |
+			IRQF_FORCE_RESUME, name, handler);
 	if (ret)
 		panic("Cannot request %s for IPI service", name);
 }
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0784a52..ac8f8a4 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
+#include <linux/clockchips.h>
 #include <linux/profile.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
@@ -47,9 +48,10 @@ unsigned long blackfin_iflush_l1_entry[NR_CPUS];
 
 struct blackfin_initial_pda __cpuinitdata initial_pda_coreb;
 
-#define BFIN_IPI_RESCHEDULE   0
-#define BFIN_IPI_CALL_FUNC    1
-#define BFIN_IPI_CPU_STOP     2
+#define BFIN_IPI_TIMER	      0
+#define BFIN_IPI_RESCHEDULE   1
+#define BFIN_IPI_CALL_FUNC    2
+#define BFIN_IPI_CPU_STOP     3
 
 struct blackfin_flush_data {
 	unsigned long start;
@@ -160,6 +162,14 @@ static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
 	return IRQ_HANDLED;
 }
 
+DECLARE_PER_CPU(struct clock_event_device, coretmr_events);
+void ipi_timer(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(coretmr_events, cpu);
+	evt->event_handler(evt);
+}
+
 static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 {
 	struct ipi_message *msg;
@@ -176,18 +186,17 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_TIMER:
+			ipi_timer();
+			break;
 		case BFIN_IPI_RESCHEDULE:
 			scheduler_ipi();
 			break;
 		case BFIN_IPI_CALL_FUNC:
-			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
-			spin_lock_irqsave(&msg_queue->lock, flags);
 			break;
 		case BFIN_IPI_CPU_STOP:
-			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_cpu_stop(cpu);
-			spin_lock_irqsave(&msg_queue->lock, flags);
 			break;
 		default:
 			printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
@@ -297,8 +306,6 @@ void smp_send_reschedule(int cpu)
 {
 	cpumask_t callmap;
 	/* simply trigger an ipi */
-	if (cpu_is_offline(cpu))
-		return;
 
 	cpumask_clear(&callmap);
 	cpumask_set_cpu(cpu, &callmap);
@@ -308,6 +315,16 @@ void smp_send_reschedule(int cpu)
 	return;
 }
 
+void smp_send_msg(const struct cpumask *mask, unsigned long type)
+{
+	smp_send_message(*mask, type, NULL, NULL, 0);
+}
+
+void smp_timer_broadcast(const struct cpumask *mask)
+{
+	smp_send_msg(mask, BFIN_IPI_TIMER);
+}
+
 void smp_send_stop(void)
 {
 	cpumask_t callmap;
@@ -326,17 +343,24 @@ void smp_send_stop(void)
 int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int ret;
-	static struct task_struct *idle;
+	struct blackfin_cpudata *ci = &per_cpu(cpu_data, cpu);
+	struct task_struct *idle = ci->idle;
 
-	if (idle)
+	if (idle) {
 		free_task(idle);
-
-	idle = fork_idle(cpu);
-	if (IS_ERR(idle)) {
-		printk(KERN_ERR "CPU%u: fork() failed\n", cpu);
-		return PTR_ERR(idle);
+		idle = NULL;
 	}
 
+	if (!idle) {
+		idle = fork_idle(cpu);
+		if (IS_ERR(idle)) {
+			printk(KERN_ERR "CPU%u: fork() failed\n", cpu);
+			return PTR_ERR(idle);
+		}
+		ci->idle = idle;
+	} else {
+		init_idle(idle, cpu);
+	}
 	secondary_stack = task_stack_page(idle) + THREAD_SIZE;
 
 	ret = platform_boot_secondary(cpu, idle);
@@ -411,6 +435,7 @@ void __cpuinit secondary_start_kernel(void)
 
 	bfin_setup_caches(cpu);
 
+	notify_cpu_starting(cpu);
 	/*
 	 * Calibrate loops per jiffy value.
 	 * IRQs need to be enabled here - D-cache can be invalidated
@@ -453,8 +478,10 @@ void smp_icache_flush_range_others(unsigned long start, unsigned long end)
 	smp_flush_data.start = start;
 	smp_flush_data.end = end;
 
-	if (smp_call_function(&ipi_flush_icache, &smp_flush_data, 0))
+	preempt_disable();
+	if (smp_call_function(&ipi_flush_icache, &smp_flush_data, 1))
 		printk(KERN_WARNING "SMP: failed to run I-cache flush request on other CPUs\n");
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(smp_icache_flush_range_others);
 
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
new file mode 100644
index 0000000..26e67f0
--- /dev/null
+++ b/arch/c6x/Kconfig
@@ -0,0 +1,174 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+config TMS320C6X
+	def_bool y
+	select CLKDEV_LOOKUP
+	select GENERIC_IRQ_SHOW
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_GENERIC_HARDIRQS
+	select HAVE_MEMBLOCK
+	select HAVE_SPARSE_IRQ
+	select OF
+	select OF_EARLY_FLATTREE
+
+config MMU
+	def_bool n
+
+config ZONE_DMA
+	def_bool y
+
+config FPU
+	def_bool n
+
+config HIGHMEM
+	def_bool n
+
+config NUMA
+	def_bool n
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+	def_bool n
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
+config GENERIC_CLOCKEVENTS_BROADCAST
+	bool
+
+config GENERIC_BUG
+	def_bool y
+
+config COMMON_CLKDEV
+	def_bool y
+
+config C6X_BIG_KERNEL
+	bool "Build a big kernel"
+	help
+	  The C6X function call instruction has a limited range of +/- 2MiB.
+	  This is sufficient for most kernels, but some kernel configurations
+	  with lots of compiled-in functionality may require a larger range
+	  for function calls. Use this option to have the compiler generate
+	  function calls with 32-bit range. This will make the kernel both
+	  larger and slower.
+
+	  If unsure, say N.
+
+source "init/Kconfig"
+
+# Use the generic interrupt handling code in kernel/irq/
+
+source "kernel/Kconfig.freezer"
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+
+config CMDLINE
+	string "Kernel command line"
+	depends on CMDLINE_BOOL
+	default "console=ttyS0,57600"
+	help
+	  On some architectures there is currently no way for the boot loader
+	  to pass arguments to the kernel. For these architectures, you should
+	  supply some command-line options at build time by entering them
+	  here.
+
+config CMDLINE_FORCE
+	bool "Force default kernel command string"
+	depends on CMDLINE_BOOL
+	default n
+	help
+	  Set this to have arguments from the default kernel command string
+	  override those passed by the boot loader.
+
+config CPU_BIG_ENDIAN
+	bool "Build big-endian kernel"
+	default n
+	help
+	  Say Y if you plan on running a kernel in big-endian mode.
+	  Note that your board must be properly built and your board
+	  port must properly enable any big-endian related features
+	  of your chipset/board/processor.
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "13"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+menu "Processor type and features"
+
+source "arch/c6x/platforms/Kconfig"
+
+config TMS320C6X_CACHES_ON
+	bool "L2 cache support"
+	default y
+
+config KERNEL_RAM_BASE_ADDRESS
+	hex "Virtual address of memory base"
+	default 0xe0000000 if SOC_TMS320C6455
+	default 0xe0000000 if SOC_TMS320C6457
+	default 0xe0000000 if SOC_TMS320C6472
+	default 0x80000000
+
+source "mm/Kconfig"
+
+source "kernel/Kconfig.preempt"
+
+source "kernel/Kconfig.hz"
+source "kernel/time/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config ACCESS_CHECK
+	bool "Check the user pointer address"
+	default y
+	help
+	  Usually the pointer transfer from user space is checked to see if its
+	  address is in the kernel space.
+
+	  Say N here to disable that check to improve the performance.
+
+endmenu
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
new file mode 100644
index 0000000..1d08dd0
--- /dev/null
+++ b/arch/c6x/Makefile
@@ -0,0 +1,60 @@
+#
+# linux/arch/c6x/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+cflags-y += -mno-dsbt -msdata=none
+
+cflags-$(CONFIG_C6X_BIG_KERNEL) += -mlong-calls
+
+CFLAGS_MODULE   += -mlong-calls -mno-dsbt -msdata=none
+
+CHECKFLAGS      +=
+
+KBUILD_CFLAGS   += $(cflags-y)
+KBUILD_AFLAGS   += $(cflags-y)
+
+ifdef CONFIG_CPU_BIG_ENDIAN
+KBUILD_CFLAGS   += -mbig-endian
+KBUILD_AFLAGS   += -mbig-endian
+LINKFLAGS       += -mbig-endian
+KBUILD_LDFLAGS  += -mbig-endian
+LDFLAGS += -EB
+endif
+
+head-y          := arch/c6x/kernel/head.o
+core-y          += arch/c6x/kernel/ arch/c6x/mm/ arch/c6x/platforms/
+libs-y          += arch/c6x/lib/
+
+# Default to vmlinux.bin, override when needed
+all: vmlinux.bin
+
+boot := arch/$(ARCH)/boot
+
+# Are we making a dtbImage.<boardname> target? If so, crack out the boardname
+DTB:=$(subst dtbImage.,,$(filter dtbImage.%, $(MAKECMDGOALS)))
+export DTB
+
+ifneq ($(DTB),)
+core-y	+= $(boot)/
+endif
+
+# With make 3.82 we cannot mix normal and wildcard targets
+
+vmlinux.bin: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+dtbImage.%: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+  @echo '  vmlinux.bin     - Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)'
+  @echo '  dtbImage.<dt>   - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
+  @echo '                  - stripped elf with fdt blob'
+endef
diff --git a/arch/c6x/boot/Makefile b/arch/c6x/boot/Makefile
new file mode 100644
index 0000000..ecca820
--- /dev/null
+++ b/arch/c6x/boot/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for bootable kernel images
+#
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+DTC_FLAGS ?= -p 1024
+
+ifneq ($(DTB),)
+obj-y += linked_dtb.o
+endif
+
+$(obj)/%.dtb: $(src)/dts/%.dts FORCE
+	$(call cmd,dtc)
+
+quiet_cmd_cp = CP      $< $@$2
+	cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
+
+# Generate builtin.dtb from $(DTB).dtb
+$(obj)/builtin.dtb: $(obj)/$(DTB).dtb
+	$(call if_changed,cp)
+
+$(obj)/linked_dtb.o: $(obj)/builtin.dtb
+
+$(obj)/dtbImage.%: vmlinux
+	$(call if_changed,objcopy)
+
+clean-files := $(obj)/*.dtb
diff --git a/arch/c6x/boot/dts/dsk6455.dts b/arch/c6x/boot/dts/dsk6455.dts
new file mode 100644
index 0000000..2b71f80
--- /dev/null
+++ b/arch/c6x/boot/dts/dsk6455.dts
@@ -0,0 +1,62 @@
+/*
+ * arch/c6x/boot/dts/dsk6455.dts
+ *
+ * DSK6455 Evaluation Platform For TMS320C6455
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+/dts-v1/;
+
+/include/ "tms320c6455.dtsi"
+
+/ {
+	model = "Spectrum Digital DSK6455";
+	compatible = "spectrum-digital,dsk6455";
+
+	chosen {
+		bootargs = "root=/dev/nfs ip=dhcp rw";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0xE0000000 0x08000000>;
+	};
+
+	soc {
+		megamod_pic: interrupt-controller@1800000 {
+			interrupts = < 12 13 14 15 >;
+		};
+
+		emifa@70000000 {
+			flash@3,0 {
+				  #address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "cfi-flash";
+				reg = <0x3 0x0 0x400000>;
+				bank-width = <1>;
+				device-width = <1>;
+				partition@0 {
+					reg = <0x0 0x400000>;
+					label = "NOR";
+				};
+			};
+		};
+
+		timer1: timer@2980000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 69 >;
+		};
+
+		clock-controller@029a0000 {
+			clock-frequency = <50000000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/evmc6457.dts b/arch/c6x/boot/dts/evmc6457.dts
new file mode 100644
index 0000000..0301eb9a
--- /dev/null
+++ b/arch/c6x/boot/dts/evmc6457.dts
@@ -0,0 +1,48 @@
+/*
+ * arch/c6x/boot/dts/evmc6457.dts
+ *
+ * EVMC6457 Evaluation Platform For TMS320C6457
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+/dts-v1/;
+
+/include/ "tms320c6457.dtsi"
+
+/ {
+	model = "eInfochips EVMC6457";
+	compatible = "einfochips,evmc6457";
+
+	chosen {
+		bootargs = "console=hvc root=/dev/nfs ip=dhcp rw";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0xE0000000 0x10000000>;
+	};
+
+	soc {
+		megamod_pic: interrupt-controller@1800000 {
+		       interrupts = < 12 13 14 15 >;
+		};
+
+		timer0: timer@2940000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 67 >;
+		};
+
+		clock-controller@29a0000 {
+			clock-frequency = <60000000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/evmc6472.dts b/arch/c6x/boot/dts/evmc6472.dts
new file mode 100644
index 0000000..3e207b4
--- /dev/null
+++ b/arch/c6x/boot/dts/evmc6472.dts
@@ -0,0 +1,73 @@
+/*
+ * arch/c6x/boot/dts/evmc6472.dts
+ *
+ * EVMC6472 Evaluation Platform For TMS320C6472
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+/dts-v1/;
+
+/include/ "tms320c6472.dtsi"
+
+/ {
+	model = "eInfochips EVMC6472";
+	compatible = "einfochips,evmc6472";
+
+	chosen {
+		bootargs = "console=hvc root=/dev/nfs ip=dhcp rw";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0xE0000000 0x10000000>;
+	};
+
+	soc {
+		megamod_pic: interrupt-controller@1800000 {
+		       interrupts = < 12 13 14 15 >;
+		};
+
+		timer0: timer@25e0000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		timer1: timer@25f0000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		timer2: timer@2600000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		timer3: timer@2610000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		timer4: timer@2620000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		timer5: timer@2630000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 16 >;
+		};
+
+		clock-controller@29a0000 {
+			clock-frequency = <25000000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/evmc6474.dts b/arch/c6x/boot/dts/evmc6474.dts
new file mode 100644
index 0000000..4dc2912
--- /dev/null
+++ b/arch/c6x/boot/dts/evmc6474.dts
@@ -0,0 +1,58 @@
+/*
+ * arch/c6x/boot/dts/evmc6474.dts
+ *
+ * EVMC6474 Evaluation Platform For TMS320C6474
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+/dts-v1/;
+
+/include/ "tms320c6474.dtsi"
+
+/ {
+	model = "Spectrum Digital EVMC6474";
+	compatible = "spectrum-digital,evmc6474";
+
+	chosen {
+		bootargs = "console=hvc root=/dev/nfs ip=dhcp rw";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x08000000>;
+	};
+
+	soc {
+		megamod_pic: interrupt-controller@1800000 {
+		       interrupts = < 12 13 14 15 >;
+		};
+
+		timer3: timer@2940000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 39 >;
+		};
+
+		timer4: timer@2950000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 41 >;
+		};
+
+		timer5: timer@2960000 {
+			interrupt-parent = <&megamod_pic>;
+			interrupts = < 43 >;
+		};
+
+		clock-controller@29a0000 {
+			clock-frequency = <50000000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/tms320c6455.dtsi b/arch/c6x/boot/dts/tms320c6455.dtsi
new file mode 100644
index 0000000..a804ec1
--- /dev/null
+++ b/arch/c6x/boot/dts/tms320c6455.dtsi
@@ -0,0 +1,96 @@
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "ti,c64x+";
+			reg = <0>;
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "tms320c6455";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		core_pic: interrupt-controller {
+			  interrupt-controller;
+			  #interrupt-cells = <1>;
+			  compatible = "ti,c64x+core-pic";
+		};
+
+		/*
+		 * Megamodule interrupt controller
+		 */
+		megamod_pic: interrupt-controller@1800000 {
+		       compatible = "ti,c64x+megamod-pic";
+		       interrupt-controller;
+		       #interrupt-cells = <1>;
+		       reg = <0x1800000 0x1000>;
+		       interrupt-parent = <&core_pic>;
+		};
+
+		cache-controller@1840000 {
+			compatible = "ti,c64x+cache";
+			reg = <0x01840000 0x8400>;
+		};
+
+		emifa@70000000 {
+			compatible = "ti,c64x+emifa", "simple-bus";
+			#address-cells = <2>;
+			#size-cells = <1>;
+			reg = <0x70000000 0x100>;
+			ranges = <0x2 0x0 0xa0000000 0x00000008
+			          0x3 0x0 0xb0000000 0x00400000
+				  0x4 0x0 0xc0000000 0x10000000
+				  0x5 0x0 0xD0000000 0x10000000>;
+
+			ti,dscr-dev-enable = <13>;
+			ti,emifa-burst-priority = <255>;
+			ti,emifa-ce-config = <0x00240120
+					      0x00240120
+					      0x00240122
+					      0x00240122>;
+		};
+
+		timer1: timer@2980000 {
+			compatible = "ti,c64x+timer64";
+			reg = <0x2980000 0x40>;
+			ti,dscr-dev-enable = <4>;
+		};
+
+		clock-controller@029a0000 {
+			compatible = "ti,c6455-pll", "ti,c64x+pll";
+			reg = <0x029a0000 0x200>;
+			ti,c64x+pll-bypass-delay = <1440>;
+			ti,c64x+pll-reset-delay = <15360>;
+			ti,c64x+pll-lock-delay = <24000>;
+		};
+
+		device-state-config-regs@2a80000 {
+			compatible = "ti,c64x+dscr";
+			reg = <0x02a80000 0x41000>;
+
+			ti,dscr-devstat = <0>;
+			ti,dscr-silicon-rev = <8 28 0xf>;
+			ti,dscr-rmii-resets = <0 0x40020 0x00040000>;
+
+			ti,dscr-locked-regs = <0x40008 0x40004 0x0f0a0b00>;
+			ti,dscr-devstate-ctl-regs =
+				 <0 12 0x40008 1 0  0  2
+				  12 1 0x40008 3 0 30  2
+				  13 2 0x4002c 1 0xffffffff 0 1>;
+			ti,dscr-devstate-stat-regs =
+				<0 10 0x40014 1 0  0  3
+				 10 2 0x40018 1 0  0  3>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/tms320c6457.dtsi b/arch/c6x/boot/dts/tms320c6457.dtsi
new file mode 100644
index 0000000..35f4070
--- /dev/null
+++ b/arch/c6x/boot/dts/tms320c6457.dtsi
@@ -0,0 +1,68 @@
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "ti,c64x+";
+			reg = <0>;
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "tms320c6457";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		core_pic: interrupt-controller {
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			compatible = "ti,c64x+core-pic";
+		};
+
+		megamod_pic: interrupt-controller@1800000 {
+			compatible = "ti,c64x+megamod-pic";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&core_pic>;
+			reg = <0x1800000 0x1000>;
+		};
+
+		cache-controller@1840000 {
+			compatible = "ti,c64x+cache";
+			reg = <0x01840000 0x8400>;
+		};
+
+		device-state-controller@2880800 {
+			compatible = "ti,c64x+dscr";
+			reg = <0x02880800 0x400>;
+
+			ti,dscr-devstat = <0x20>;
+			ti,dscr-silicon-rev = <0x18 28 0xf>;
+			ti,dscr-mac-fuse-regs = <0x114 3 4 5 6
+						 0x118 0 0 1 2>;
+			ti,dscr-kick-regs = <0x38 0x83E70B13
+					     0x3c 0x95A4F1E0>;
+		};
+
+		timer0: timer@2940000 {
+			compatible = "ti,c64x+timer64";
+			reg = <0x2940000 0x40>;
+		};
+
+		clock-controller@29a0000 {
+			compatible = "ti,c6457-pll", "ti,c64x+pll";
+			reg = <0x029a0000 0x200>;
+			ti,c64x+pll-bypass-delay = <300>;
+			ti,c64x+pll-reset-delay = <24000>;
+			ti,c64x+pll-lock-delay = <50000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/tms320c6472.dtsi b/arch/c6x/boot/dts/tms320c6472.dtsi
new file mode 100644
index 0000000..b488aae
--- /dev/null
+++ b/arch/c6x/boot/dts/tms320c6472.dtsi
@@ -0,0 +1,134 @@
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			reg = <0>;
+			model = "ti,c64x+";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			reg = <1>;
+			model = "ti,c64x+";
+		};
+		cpu@2 {
+			device_type = "cpu";
+			reg = <2>;
+			model = "ti,c64x+";
+		};
+		cpu@3 {
+			device_type = "cpu";
+			reg = <3>;
+			model = "ti,c64x+";
+		};
+		cpu@4 {
+			device_type = "cpu";
+			reg = <4>;
+			model = "ti,c64x+";
+		};
+		cpu@5 {
+			device_type = "cpu";
+			reg = <5>;
+			model = "ti,c64x+";
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "tms320c6472";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		core_pic: interrupt-controller {
+			compatible = "ti,c64x+core-pic";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		megamod_pic: interrupt-controller@1800000 {
+		       compatible = "ti,c64x+megamod-pic";
+		       interrupt-controller;
+		       #interrupt-cells = <1>;
+		       reg = <0x1800000 0x1000>;
+		       interrupt-parent = <&core_pic>;
+		};
+
+		cache-controller@1840000 {
+			compatible = "ti,c64x+cache";
+			reg = <0x01840000 0x8400>;
+		};
+
+		timer0: timer@25e0000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x01 >;
+			reg = <0x25e0000 0x40>;
+		};
+
+		timer1: timer@25f0000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x02 >;
+			reg = <0x25f0000 0x40>;
+		};
+
+		timer2: timer@2600000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x04 >;
+			reg = <0x2600000 0x40>;
+		};
+
+		timer3: timer@2610000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x08 >;
+			reg = <0x2610000 0x40>;
+		};
+
+		timer4: timer@2620000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x10 >;
+			reg = <0x2620000 0x40>;
+		};
+
+		timer5: timer@2630000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x20 >;
+			reg = <0x2630000 0x40>;
+		};
+
+		clock-controller@29a0000 {
+			compatible = "ti,c6472-pll", "ti,c64x+pll";
+			reg = <0x029a0000 0x200>;
+			ti,c64x+pll-bypass-delay = <200>;
+			ti,c64x+pll-reset-delay = <12000>;
+			ti,c64x+pll-lock-delay = <80000>;
+		};
+
+		device-state-controller@2a80000 {
+			compatible = "ti,c64x+dscr";
+			reg = <0x02a80000 0x1000>;
+
+			ti,dscr-devstat = <0>;
+			ti,dscr-silicon-rev = <0x70c 16 0xff>;
+
+			ti,dscr-mac-fuse-regs = <0x700 1 2 3 4
+						 0x704 5 6 0 0>;
+
+			ti,dscr-rmii-resets = <0x208 1
+					       0x20c 1>;
+
+			ti,dscr-locked-regs = <0x200 0x204 0x0a1e183a
+					       0x40c 0x420 0xbea7
+					       0x41c 0x420 0xbea7>;
+
+			ti,dscr-privperm = <0x41c 0xaaaaaaaa>;
+
+			ti,dscr-devstate-ctl-regs = <0 13 0x200 1 0 0 1>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/dts/tms320c6474.dtsi b/arch/c6x/boot/dts/tms320c6474.dtsi
new file mode 100644
index 0000000..cc601bf
--- /dev/null
+++ b/arch/c6x/boot/dts/tms320c6474.dtsi
@@ -0,0 +1,89 @@
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			reg = <0>;
+			model = "ti,c64x+";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			reg = <1>;
+			model = "ti,c64x+";
+		};
+		cpu@2 {
+			device_type = "cpu";
+			reg = <2>;
+			model = "ti,c64x+";
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "tms320c6474";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		core_pic: interrupt-controller {
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			compatible = "ti,c64x+core-pic";
+		};
+
+		megamod_pic: interrupt-controller@1800000 {
+		       compatible = "ti,c64x+megamod-pic";
+		       interrupt-controller;
+		       #interrupt-cells = <1>;
+		       reg = <0x1800000 0x1000>;
+		       interrupt-parent = <&core_pic>;
+		};
+
+		cache-controller@1840000 {
+			compatible = "ti,c64x+cache";
+			reg = <0x01840000 0x8400>;
+		};
+
+		timer3: timer@2940000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x04 >;
+			reg = <0x2940000 0x40>;
+		};
+
+		timer4: timer@2950000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x02 >;
+			reg = <0x2950000 0x40>;
+		};
+
+		timer5: timer@2960000 {
+			compatible = "ti,c64x+timer64";
+			ti,core-mask = < 0x01 >;
+			reg = <0x2960000 0x40>;
+		};
+
+		device-state-controller@2880800 {
+			compatible = "ti,c64x+dscr";
+			reg = <0x02880800 0x400>;
+
+			ti,dscr-devstat = <0x004>;
+			ti,dscr-silicon-rev = <0x014 28 0xf>;
+			ti,dscr-mac-fuse-regs = <0x34 3 4 5 6
+						 0x38 0 0 1 2>;
+		};
+
+		clock-controller@29a0000 {
+			compatible = "ti,c6474-pll", "ti,c64x+pll";
+			reg = <0x029a0000 0x200>;
+			ti,c64x+pll-bypass-delay = <120>;
+			ti,c64x+pll-reset-delay = <30000>;
+			ti,c64x+pll-lock-delay = <60000>;
+		};
+	};
+};
diff --git a/arch/c6x/boot/linked_dtb.S b/arch/c6x/boot/linked_dtb.S
new file mode 100644
index 0000000..57a4454
--- /dev/null
+++ b/arch/c6x/boot/linked_dtb.S
@@ -0,0 +1,2 @@
+.section __fdt_blob,"a"
+.incbin "arch/c6x/boot/builtin.dtb"
diff --git a/arch/c6x/configs/dsk6455_defconfig b/arch/c6x/configs/dsk6455_defconfig
new file mode 100644
index 0000000..4663487
--- /dev/null
+++ b/arch/c6x/configs/dsk6455_defconfig
@@ -0,0 +1,44 @@
+CONFIG_SOC_TMS320C6455=y
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_FUTEX is not set
+# CONFIG_SLUB_DEBUG is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=17000
+CONFIG_MISC_DEVICES=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRC16=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
diff --git a/arch/c6x/configs/evmc6457_defconfig b/arch/c6x/configs/evmc6457_defconfig
new file mode 100644
index 0000000..bba40e1
--- /dev/null
+++ b/arch/c6x/configs/evmc6457_defconfig
@@ -0,0 +1,41 @@
+CONFIG_SOC_TMS320C6457=y
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_FUTEX is not set
+# CONFIG_SLUB_DEBUG is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+CONFIG_BOARD_EVM6457=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=17000
+CONFIG_MISC_DEVICES=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRC16=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/c6x/configs/evmc6472_defconfig b/arch/c6x/configs/evmc6472_defconfig
new file mode 100644
index 0000000..8c46155
--- /dev/null
+++ b/arch/c6x/configs/evmc6472_defconfig
@@ -0,0 +1,42 @@
+CONFIG_SOC_TMS320C6472=y
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_FUTEX is not set
+# CONFIG_SLUB_DEBUG is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+# CONFIG_CMDLINE_FORCE is not set
+CONFIG_BOARD_EVM6472=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=17000
+CONFIG_MISC_DEVICES=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRC16=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/c6x/configs/evmc6474_defconfig b/arch/c6x/configs/evmc6474_defconfig
new file mode 100644
index 0000000..15533f6
--- /dev/null
+++ b/arch/c6x/configs/evmc6474_defconfig
@@ -0,0 +1,42 @@
+CONFIG_SOC_TMS320C6474=y
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_FUTEX is not set
+# CONFIG_SLUB_DEBUG is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+# CONFIG_CMDLINE_FORCE is not set
+CONFIG_BOARD_EVM6474=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=17000
+CONFIG_MISC_DEVICES=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRC16=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
new file mode 100644
index 0000000..13dcf78
--- /dev/null
+++ b/arch/c6x/include/asm/Kbuild
@@ -0,0 +1,54 @@
+include include/asm-generic/Kbuild.asm
+
+generic-y += atomic.h
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += dma.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += futex.h
+generic-y += hw_irq.h
+generic-y += io.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += local.h
+generic-y += mman.h
+generic-y += mmu_context.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += pgalloc.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += segment.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += tlbflush.h
+generic-y += topology.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += user.h
+generic-y += vga.h
diff --git a/arch/c6x/include/asm/asm-offsets.h b/arch/c6x/include/asm/asm-offsets.h
new file mode 100644
index 0000000..d370ee3
--- /dev/null
+++ b/arch/c6x/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/c6x/include/asm/bitops.h b/arch/c6x/include/asm/bitops.h
new file mode 100644
index 0000000..39ab7e8
--- /dev/null
+++ b/arch/c6x/include/asm/bitops.h
@@ -0,0 +1,105 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_BITOPS_H
+#define _ASM_C6X_BITOPS_H
+
+#ifdef __KERNEL__
+
+#include <linux/bitops.h>
+
+#include <asm/system.h>
+#include <asm/byteorder.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler.
+ */
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit()  barrier()
+
+/*
+ * We are lucky, DSP is perfect for bitops: do it in 3 cycles
+ */
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ * Note __ffs(0) = undef, __ffs(1) = 0, __ffs(0x80000000) = 31.
+ *
+ */
+static inline unsigned long __ffs(unsigned long x)
+{
+	asm (" bitr  .M1  %0,%0\n"
+	     " nop\n"
+	     " lmbd  .L1  1,%0,%0\n"
+	     : "+a"(x));
+
+	return x;
+}
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+#define ffz(x) __ffs(~(x))
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline int fls(int x)
+{
+	if (!x)
+		return 0;
+
+	asm (" lmbd  .L1  1,%0,%0\n" : "+a"(x));
+
+	return 32 - x;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ * Note ffs(0) = 0, ffs(1) = 1, ffs(0x80000000) = 32.
+ */
+static inline int ffs(int x)
+{
+	if (!x)
+		return 0;
+
+	return __ffs(x) + 1;
+}
+
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_C6X_BITOPS_H */
diff --git a/arch/c6x/include/asm/byteorder.h b/arch/c6x/include/asm/byteorder.h
new file mode 100644
index 0000000..166038d
--- /dev/null
+++ b/arch/c6x/include/asm/byteorder.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_C6X_BYTEORDER_H
+#define _ASM_C6X_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef _BIG_ENDIAN
+#include <linux/byteorder/big_endian.h>
+#else /* _BIG_ENDIAN */
+#include <linux/byteorder/little_endian.h>
+#endif /* _BIG_ENDIAN */
+
+#endif /* _ASM_BYTEORDER_H */
diff --git a/arch/c6x/include/asm/cache.h b/arch/c6x/include/asm/cache.h
new file mode 100644
index 0000000..6d521d9
--- /dev/null
+++ b/arch/c6x/include/asm/cache.h
@@ -0,0 +1,90 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2005, 2006, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_CACHE_H
+#define _ASM_C6X_CACHE_H
+
+#include <linux/irqflags.h>
+
+/*
+ * Cache line size
+ */
+#define L1D_CACHE_BYTES   64
+#define L1P_CACHE_BYTES   32
+#define L2_CACHE_BYTES	  128
+
+/*
+ * L2 used as cache
+ */
+#define L2MODE_SIZE	  L2MODE_256K_CACHE
+
+/*
+ * For practical reasons the L1_CACHE_BYTES defines should not be smaller than
+ * the L2 line size
+ */
+#define L1_CACHE_BYTES        L2_CACHE_BYTES
+
+#define L2_CACHE_ALIGN_LOW(x) \
+	(((x) & ~(L2_CACHE_BYTES - 1)))
+#define L2_CACHE_ALIGN_UP(x) \
+	(((x) + (L2_CACHE_BYTES - 1)) & ~(L2_CACHE_BYTES - 1))
+#define L2_CACHE_ALIGN_CNT(x) \
+	(((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
+
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+#define ARCH_SLAB_MINALIGN	L1_CACHE_BYTES
+
+/*
+ * This is the granularity of hardware cacheability control.
+ */
+#define CACHEABILITY_ALIGN	0x01000000
+
+/*
+ * Align a physical address to MAR regions
+ */
+#define CACHE_REGION_START(v) \
+	(((u32) (v)) & ~(CACHEABILITY_ALIGN - 1))
+#define CACHE_REGION_END(v) \
+	(((u32) (v) + (CACHEABILITY_ALIGN - 1)) & ~(CACHEABILITY_ALIGN - 1))
+
+extern void __init c6x_cache_init(void);
+
+extern void enable_caching(unsigned long start, unsigned long end);
+extern void disable_caching(unsigned long start, unsigned long end);
+
+extern void L1_cache_off(void);
+extern void L1_cache_on(void);
+
+extern void L1P_cache_global_invalidate(void);
+extern void L1D_cache_global_invalidate(void);
+extern void L1D_cache_global_writeback(void);
+extern void L1D_cache_global_writeback_invalidate(void);
+extern void L2_cache_set_mode(unsigned int mode);
+extern void L2_cache_global_writeback_invalidate(void);
+extern void L2_cache_global_writeback(void);
+
+extern void L1P_cache_block_invalidate(unsigned int start, unsigned int end);
+extern void L1D_cache_block_invalidate(unsigned int start, unsigned int end);
+extern void L1D_cache_block_writeback_invalidate(unsigned int start,
+						 unsigned int end);
+extern void L1D_cache_block_writeback(unsigned int start, unsigned int end);
+extern void L2_cache_block_invalidate(unsigned int start, unsigned int end);
+extern void L2_cache_block_writeback(unsigned int start, unsigned int end);
+extern void L2_cache_block_writeback_invalidate(unsigned int start,
+						unsigned int end);
+extern void L2_cache_block_invalidate_nowait(unsigned int start,
+					     unsigned int end);
+extern void L2_cache_block_writeback_nowait(unsigned int start,
+					    unsigned int end);
+
+extern void L2_cache_block_writeback_invalidate_nowait(unsigned int start,
+						       unsigned int end);
+
+#endif /* _ASM_C6X_CACHE_H */
diff --git a/arch/c6x/include/asm/cacheflush.h b/arch/c6x/include/asm/cacheflush.h
new file mode 100644
index 0000000..df5db90
--- /dev/null
+++ b/arch/c6x/include/asm/cacheflush.h
@@ -0,0 +1,65 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_CACHEFLUSH_H
+#define _ASM_C6X_CACHEFLUSH_H
+
+#include <linux/spinlock.h>
+
+#include <asm/setup.h>
+#include <asm/cache.h>
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/string.h>
+
+/*
+ * virtually-indexed cache management (our cache is physically indexed)
+ */
+#define flush_cache_all()			do {} while (0)
+#define flush_cache_mm(mm)			do {} while (0)
+#define flush_cache_dup_mm(mm)			do {} while (0)
+#define flush_cache_range(mm, start, end)	do {} while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do {} while (0)
+#define flush_cache_vmap(start, end)		do {} while (0)
+#define flush_cache_vunmap(start, end)		do {} while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do {} while (0)
+#define flush_dcache_mmap_lock(mapping)		do {} while (0)
+#define flush_dcache_mmap_unlock(mapping)	do {} while (0)
+
+/*
+ * physically-indexed cache management
+ */
+#define flush_icache_range(s, e)				  \
+do {								  \
+		L1D_cache_block_writeback((s), (e));		  \
+		L1P_cache_block_invalidate((s), (e));		  \
+} while (0)
+
+#define flush_icache_page(vma, page)					  \
+do {								  \
+	if ((vma)->vm_flags & PROT_EXEC)				  \
+		L1D_cache_block_writeback_invalidate(page_address(page),  \
+			(unsigned long) page_address(page) + PAGE_SIZE)); \
+		L1P_cache_block_invalidate(page_address(page),		  \
+			(unsigned long) page_address(page) + PAGE_SIZE)); \
+} while (0)
+
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+do {						     \
+	memcpy(dst, src, len);			     \
+	flush_icache_range((unsigned) (dst), (unsigned) (dst) + (len)); \
+} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#endif /* _ASM_C6X_CACHEFLUSH_H */
diff --git a/arch/c6x/include/asm/checksum.h b/arch/c6x/include/asm/checksum.h
new file mode 100644
index 0000000..7246816
--- /dev/null
+++ b/arch/c6x/include/asm/checksum.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_CHECKSUM_H
+#define _ASM_C6X_CHECKSUM_H
+
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
+{
+	unsigned long long tmp;
+
+	asm ("add     .d1   %1,%5,%1\n"
+	     "|| addu .l1   %3,%4,%0\n"
+	     "addu    .l1   %2,%0,%0\n"
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	     "|| shl  .s1   %1,8,%1\n"
+#endif
+	     "addu    .l1   %1,%0,%0\n"
+	     "add     .l1   %P0,%p0,%2\n"
+	     : "=&a"(tmp), "+a"(len), "+a"(sum)
+	     : "a" (saddr), "a" (daddr), "a" (proto));
+	return sum;
+}
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+
+#include <asm-generic/checksum.h>
+
+#endif /* _ASM_C6X_CHECKSUM_H */
diff --git a/arch/c6x/include/asm/clkdev.h b/arch/c6x/include/asm/clkdev.h
new file mode 100644
index 0000000..76a070b
--- /dev/null
+++ b/arch/c6x/include/asm/clkdev.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_CLKDEV_H
+#define _ASM_CLKDEV_H
+
+#include <linux/slab.h>
+
+struct clk;
+
+static inline int __clk_get(struct clk *clk)
+{
+	return 1;
+}
+
+static inline void __clk_put(struct clk *clk)
+{
+}
+
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+#endif /* _ASM_CLKDEV_H */
diff --git a/arch/c6x/include/asm/clock.h b/arch/c6x/include/asm/clock.h
new file mode 100644
index 0000000..bcf42b2
--- /dev/null
+++ b/arch/c6x/include/asm/clock.h
@@ -0,0 +1,148 @@
+/*
+ * TI C64X clock definitions
+ *
+ * Copyright (C) 2010, 2011 Texas Instruments.
+ * Contributed by: Mark Salter <msalter@redhat.com>
+ *
+ * Copied heavily from arm/mach-davinci/clock.h, so:
+ *
+ * Copyright (C) 2006-2007 Texas Instruments.
+ * Copyright (C) 2008-2009 Deep Root Systems, LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_C6X_CLOCK_H
+#define _ASM_C6X_CLOCK_H
+
+#ifndef __ASSEMBLER__
+
+#include <linux/list.h>
+
+/* PLL/Reset register offsets */
+#define PLLCTL		0x100
+#define PLLM		0x110
+#define PLLPRE		0x114
+#define PLLDIV1		0x118
+#define PLLDIV2		0x11c
+#define PLLDIV3		0x120
+#define PLLPOST		0x128
+#define PLLCMD		0x138
+#define PLLSTAT		0x13c
+#define PLLALNCTL	0x140
+#define PLLDCHANGE	0x144
+#define PLLCKEN		0x148
+#define PLLCKSTAT	0x14c
+#define PLLSYSTAT	0x150
+#define PLLDIV4		0x160
+#define PLLDIV5		0x164
+#define PLLDIV6		0x168
+#define PLLDIV7		0x16c
+#define PLLDIV8		0x170
+#define PLLDIV9		0x174
+#define PLLDIV10	0x178
+#define PLLDIV11	0x17c
+#define PLLDIV12	0x180
+#define PLLDIV13	0x184
+#define PLLDIV14	0x188
+#define PLLDIV15	0x18c
+#define PLLDIV16	0x190
+
+/* PLLM register bits */
+#define PLLM_PLLM_MASK	0xff
+#define PLLM_VAL(x)	((x) - 1)
+
+/* PREDIV register bits */
+#define PLLPREDIV_EN	BIT(15)
+#define PLLPREDIV_VAL(x) ((x) - 1)
+
+/* PLLCTL register bits */
+#define PLLCTL_PLLEN	BIT(0)
+#define PLLCTL_PLLPWRDN	BIT(1)
+#define PLLCTL_PLLRST	BIT(3)
+#define PLLCTL_PLLDIS	BIT(4)
+#define PLLCTL_PLLENSRC	BIT(5)
+#define PLLCTL_CLKMODE	BIT(8)
+
+/* PLLCMD register bits */
+#define PLLCMD_GOSTAT	BIT(0)
+
+/* PLLSTAT register bits */
+#define PLLSTAT_GOSTAT	BIT(0)
+
+/* PLLDIV register bits */
+#define PLLDIV_EN	BIT(15)
+#define PLLDIV_RATIO_MASK 0x1f
+#define PLLDIV_RATIO(x) ((x) - 1)
+
+struct pll_data;
+
+struct clk {
+	struct list_head	node;
+	struct module		*owner;
+	const char		*name;
+	unsigned long		rate;
+	int			usecount;
+	u32			flags;
+	struct clk		*parent;
+	struct list_head	children;	/* list of children */
+	struct list_head	childnode;	/* parent's child list node */
+	struct pll_data		*pll_data;
+	u32			div;
+	unsigned long (*recalc) (struct clk *);
+	int (*set_rate) (struct clk *clk, unsigned long rate);
+	int (*round_rate) (struct clk *clk, unsigned long rate);
+};
+
+/* Clock flags: SoC-specific flags start at BIT(16) */
+#define ALWAYS_ENABLED		BIT(1)
+#define CLK_PLL			BIT(2) /* PLL-derived clock */
+#define PRE_PLL			BIT(3) /* source is before PLL mult/div */
+#define FIXED_DIV_PLL		BIT(4) /* fixed divisor from PLL */
+#define FIXED_RATE_PLL		BIT(5) /* fixed ouput rate PLL */
+
+#define MAX_PLL_SYSCLKS 16
+
+struct pll_data {
+	void __iomem *base;
+	u32 num;
+	u32 flags;
+	u32 input_rate;
+	u32 bypass_delay; /* in loops */
+	u32 reset_delay;  /* in loops */
+	u32 lock_delay;   /* in loops */
+	struct clk sysclks[MAX_PLL_SYSCLKS + 1];
+};
+
+/* pll_data flag bit */
+#define PLL_HAS_PRE	BIT(0)
+#define PLL_HAS_MUL	BIT(1)
+#define PLL_HAS_POST	BIT(2)
+
+#define CLK(dev, con, ck)	\
+	{			\
+		.dev_id = dev,	\
+		.con_id = con,	\
+		.clk = ck,	\
+	}			\
+
+extern void c6x_clks_init(struct clk_lookup *clocks);
+extern int clk_register(struct clk *clk);
+extern void clk_unregister(struct clk *clk);
+extern void c64x_setup_clocks(void);
+
+extern struct pll_data c6x_soc_pll1;
+
+extern struct clk clkin1;
+extern struct clk c6x_core_clk;
+extern struct clk c6x_i2c_clk;
+extern struct clk c6x_watchdog_clk;
+extern struct clk c6x_mcbsp1_clk;
+extern struct clk c6x_mcbsp2_clk;
+extern struct clk c6x_mdio_clk;
+
+#endif
+
+#endif /* _ASM_C6X_CLOCK_H */
diff --git a/arch/c6x/include/asm/delay.h b/arch/c6x/include/asm/delay.h
new file mode 100644
index 0000000..f314c2e
--- /dev/null
+++ b/arch/c6x/include/asm/delay.h
@@ -0,0 +1,67 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_DELAY_H
+#define _ASM_C6X_DELAY_H
+
+#include <linux/kernel.h>
+
+extern unsigned int ticks_per_ns_scaled;
+
+static inline void __delay(unsigned long loops)
+{
+	uint32_t tmp;
+
+	/* 6 cycles per loop */
+	asm volatile ("        mv    .s1  %0,%1\n"
+		      "0: [%1] b     .s1  0b\n"
+		      "        add   .l1  -6,%0,%0\n"
+		      "        cmplt .l1  1,%0,%1\n"
+		      "        nop   3\n"
+		      : "+a"(loops), "=A"(tmp));
+}
+
+static inline void _c6x_tickdelay(unsigned int x)
+{
+	uint32_t cnt, endcnt;
+
+	asm volatile ("        mvc   .s2   TSCL,%0\n"
+		      "        add   .s2x  %0,%1,%2\n"
+		      " ||     mvk   .l2   1,B0\n"
+		      "0: [B0] b     .s2   0b\n"
+		      "        mvc   .s2   TSCL,%0\n"
+		      "        sub   .s2   %0,%2,%0\n"
+		      "        cmpgt .l2   0,%0,B0\n"
+		      "        nop   2\n"
+		      : "=b"(cnt), "+a"(x), "=b"(endcnt) : : "B0");
+}
+
+/* use scaled math to avoid slow division */
+#define C6X_NDELAY_SCALE 10
+
+static inline void _ndelay(unsigned int n)
+{
+	_c6x_tickdelay((ticks_per_ns_scaled * n) >> C6X_NDELAY_SCALE);
+}
+
+static inline void _udelay(unsigned int n)
+{
+	while (n >= 10) {
+		_ndelay(10000);
+		n -= 10;
+	}
+	while (n-- > 0)
+		_ndelay(1000);
+}
+
+#define udelay(x) _udelay((unsigned int)(x))
+#define ndelay(x) _ndelay((unsigned int)(x))
+
+#endif /* _ASM_C6X_DELAY_H */
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
new file mode 100644
index 0000000..03579fd
--- /dev/null
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -0,0 +1,91 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#ifndef _ASM_C6X_DMA_MAPPING_H
+#define _ASM_C6X_DMA_MAPPING_H
+
+#include <linux/dma-debug.h>
+#include <asm-generic/dma-coherent.h>
+
+#define dma_supported(d, m)	1
+
+static inline int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	*dev->dma_mask = dma_mask;
+
+	return 0;
+}
+
+/*
+ * DMA errors are defined by all-bits-set in the DMA address.
+ */
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == ~0;
+}
+
+extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
+				 size_t size, enum dma_data_direction dir);
+
+extern void dma_unmap_single(struct device *dev, dma_addr_t handle,
+			     size_t size, enum dma_data_direction dir);
+
+extern int dma_map_sg(struct device *dev, struct scatterlist *sglist,
+		      int nents, enum dma_data_direction direction);
+
+extern void dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+			 int nents, enum dma_data_direction direction);
+
+static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction dir)
+{
+	dma_addr_t handle;
+
+	handle = dma_map_single(dev, page_address(page) + offset, size, dir);
+
+	debug_dma_map_page(dev, page, offset, size, dir, handle, false);
+
+	return handle;
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_unmap_single(dev, handle, size, dir);
+
+	debug_dma_unmap_page(dev, handle, size, dir, false);
+}
+
+extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
+				    size_t size, enum dma_data_direction dir);
+
+extern void dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
+				       size_t size,
+				       enum dma_data_direction dir);
+
+extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				int nents, enum dma_data_direction dir);
+
+extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				   int nents, enum dma_data_direction dir);
+
+extern void coherent_mem_init(u32 start, u32 size);
+extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
+extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t);
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent((d), (s), (h), (f))
+#define dma_free_noncoherent(d, s, v, h)  dma_free_coherent((d), (s), (v), (h))
+
+#endif	/* _ASM_C6X_DMA_MAPPING_H */
diff --git a/arch/c6x/include/asm/dscr.h b/arch/c6x/include/asm/dscr.h
new file mode 100644
index 0000000..561ba83
--- /dev/null
+++ b/arch/c6x/include/asm/dscr.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#ifndef _ASM_C6X_DSCR_H
+#define _ASM_C6X_DSCR_H
+
+enum dscr_devstate_t {
+	DSCR_DEVSTATE_ENABLED,
+	DSCR_DEVSTATE_DISABLED,
+};
+
+/*
+ * Set the device state of the device with the given ID.
+ *
+ * Individual drivers should use this to enable or disable the
+ * hardware device. The devid used to identify the device being
+ * controlled should be a property in the device's tree node.
+ */
+extern void dscr_set_devstate(int devid, enum dscr_devstate_t state);
+
+/*
+ * Assert or de-assert an RMII reset.
+ */
+extern void dscr_rmii_reset(int id, int assert);
+
+extern void dscr_probe(void);
+
+#endif /* _ASM_C6X_DSCR_H */
diff --git a/arch/c6x/include/asm/elf.h b/arch/c6x/include/asm/elf.h
new file mode 100644
index 0000000..d57865b
--- /dev/null
+++ b/arch/c6x/include/asm/elf.h
@@ -0,0 +1,113 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_ELF_H
+#define _ASM_C6X_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+#include <asm/ptrace.h>
+
+typedef unsigned long elf_greg_t;
+typedef unsigned long elf_fpreg_t;
+
+#define ELF_NGREG  58
+#define ELF_NFPREG 1
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_TI_C6000)
+
+#define elf_check_const_displacement(x) (1)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA	ELFDATA2LSB
+#else
+#define ELF_DATA	ELFDATA2MSB
+#endif
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_TI_C6000
+
+/* Nothing for now. Need to setup DP... */
+#define ELF_PLAT_INIT(_r)
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)		\
+	memcpy((char *) &_dest, (char *) _regs,		\
+	sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM  (NULL)
+
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
+
+/* C6X specific section types */
+#define SHT_C6000_UNWIND	0x70000001
+#define SHT_C6000_PREEMPTMAP	0x70000002
+#define SHT_C6000_ATTRIBUTES	0x70000003
+
+/* C6X specific DT_ tags */
+#define DT_C6000_DSBT_BASE	0x70000000
+#define DT_C6000_DSBT_SIZE	0x70000001
+#define DT_C6000_PREEMPTMAP	0x70000002
+#define DT_C6000_DSBT_INDEX	0x70000003
+
+/* C6X specific relocs */
+#define R_C6000_NONE		0
+#define R_C6000_ABS32		1
+#define R_C6000_ABS16		2
+#define R_C6000_ABS8		3
+#define R_C6000_PCR_S21		4
+#define R_C6000_PCR_S12		5
+#define R_C6000_PCR_S10		6
+#define R_C6000_PCR_S7		7
+#define R_C6000_ABS_S16		8
+#define R_C6000_ABS_L16		9
+#define R_C6000_ABS_H16		10
+#define R_C6000_SBR_U15_B	11
+#define R_C6000_SBR_U15_H	12
+#define R_C6000_SBR_U15_W	13
+#define R_C6000_SBR_S16		14
+#define R_C6000_SBR_L16_B	15
+#define R_C6000_SBR_L16_H	16
+#define R_C6000_SBR_L16_W	17
+#define R_C6000_SBR_H16_B	18
+#define R_C6000_SBR_H16_H	19
+#define R_C6000_SBR_H16_W	20
+#define R_C6000_SBR_GOT_U15_W	21
+#define R_C6000_SBR_GOT_L16_W	22
+#define R_C6000_SBR_GOT_H16_W	23
+#define R_C6000_DSBT_INDEX	24
+#define R_C6000_PREL31		25
+#define R_C6000_COPY		26
+#define R_C6000_ALIGN		253
+#define R_C6000_FPHEAD		254
+#define R_C6000_NOCMP		255
+
+#endif /*_ASM_C6X_ELF_H */
diff --git a/arch/c6x/include/asm/ftrace.h b/arch/c6x/include/asm/ftrace.h
new file mode 100644
index 0000000..3701958
--- /dev/null
+++ b/arch/c6x/include/asm/ftrace.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_C6X_FTRACE_H
+#define _ASM_C6X_FTRACE_H
+
+/* empty */
+
+#endif /* _ASM_C6X_FTRACE_H */
diff --git a/arch/c6x/include/asm/hardirq.h b/arch/c6x/include/asm/hardirq.h
new file mode 100644
index 0000000..9621954
--- /dev/null
+++ b/arch/c6x/include/asm/hardirq.h
@@ -0,0 +1,20 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_C6X_HARDIRQ_H
+#define _ASM_C6X_HARDIRQ_H
+
+extern void ack_bad_irq(int irq);
+#define ack_bad_irq ack_bad_irq
+
+#include <asm-generic/hardirq.h>
+
+#endif /* _ASM_C6X_HARDIRQ_H */
diff --git a/arch/c6x/include/asm/irq.h b/arch/c6x/include/asm/irq.h
new file mode 100644
index 0000000..a6ae3c9
--- /dev/null
+++ b/arch/c6x/include/asm/irq.h
@@ -0,0 +1,302 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Large parts taken directly from powerpc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_IRQ_H
+#define _ASM_C6X_IRQ_H
+
+#include <linux/threads.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <asm/percpu.h>
+
+#define irq_canonicalize(irq)  (irq)
+
+/*
+ * The C64X+ core has 16 IRQ vectors. One each is used by Reset and NMI. Two
+ * are reserved. The remaining 12 vectors are used to route SoC interrupts.
+ * These interrupt vectors are prioritized with IRQ 4 having the highest
+ * priority and IRQ 15 having the lowest.
+ *
+ * The C64x+ megamodule provides a PIC which combines SoC IRQ sources into a
+ * single core IRQ vector. There are four combined sources, each of which
+ * feed into one of the 12 general interrupt vectors. The remaining 8 vectors
+ * can each route a single SoC interrupt directly.
+ */
+#define NR_PRIORITY_IRQS 16
+
+#define NR_IRQS_LEGACY	NR_PRIORITY_IRQS
+
+/* Total number of virq in the platform */
+#define NR_IRQS		256
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ		0
+
+/* This type is the placeholder for a hardware interrupt number. It has to
+ * be big enough to enclose whatever representation is used by a given
+ * platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+/* Interrupt controller "host" data structure. This could be defined as a
+ * irq domain controller. That is, it handles the mapping between hardware
+ * and virtual interrupt numbers for a given interrupt domain. The host
+ * structure is generally created by the PIC code for a given PIC instance
+ * (though a host can cover more than one PIC if they have a flat number
+ * model). It's the host callbacks that are responsible for setting the
+ * irq_chip on a given irq_desc after it's been mapped.
+ *
+ * The host code and data structures are fairly agnostic to the fact that
+ * we use an open firmware device-tree. We do have references to struct
+ * device_node in two places: in irq_find_host() to find the host matching
+ * a given interrupt controller node, and of course as an argument to its
+ * counterpart host->ops->match() callback. However, those are treated as
+ * generic pointers by the core and the fact that it's actually a device-node
+ * pointer is purely a convention between callers and implementation. This
+ * code could thus be used on other architectures by replacing those two
+ * by some sort of arch-specific void * "token" used to identify interrupt
+ * controllers.
+ */
+struct irq_host;
+struct radix_tree_root;
+struct device_node;
+
+/* Functions below are provided by the host and called whenever a new mapping
+ * is created or an old mapping is disposed. The host can then proceed to
+ * whatever internal data structures management is required. It also needs
+ * to setup the irq_desc when returning from map().
+ */
+struct irq_host_ops {
+	/* Match an interrupt controller device node to a host, returns
+	 * 1 on a match
+	 */
+	int (*match)(struct irq_host *h, struct device_node *node);
+
+	/* Create or update a mapping between a virtual irq number and a hw
+	 * irq number. This is called only once for a given mapping.
+	 */
+	int (*map)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
+
+	/* Dispose of such a mapping */
+	void (*unmap)(struct irq_host *h, unsigned int virq);
+
+	/* Translate device-tree interrupt specifier from raw format coming
+	 * from the firmware to a irq_hw_number_t (interrupt line number) and
+	 * type (sense) that can be passed to set_irq_type(). In the absence
+	 * of this callback, irq_create_of_mapping() and irq_of_parse_and_map()
+	 * will return the hw number in the first cell and IRQ_TYPE_NONE for
+	 * the type (which amount to keeping whatever default value the
+	 * interrupt controller has for that line)
+	 */
+	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
+		     const u32 *intspec, unsigned int intsize,
+		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
+};
+
+struct irq_host {
+	struct list_head	link;
+
+	/* type of reverse mapping technique */
+	unsigned int		revmap_type;
+#define IRQ_HOST_MAP_PRIORITY   0 /* core priority irqs, get irqs 1..15 */
+#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
+#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
+#define IRQ_HOST_MAP_TREE	3 /* radix tree */
+	union {
+		struct {
+			unsigned int size;
+			unsigned int *revmap;
+		} linear;
+		struct radix_tree_root tree;
+	} revmap_data;
+	struct irq_host_ops	*ops;
+	void			*host_data;
+	irq_hw_number_t		inval_irq;
+
+	/* Optional device node pointer */
+	struct device_node	*of_node;
+};
+
+struct irq_data;
+extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
+extern irq_hw_number_t virq_to_hw(unsigned int virq);
+extern bool virq_is_host(unsigned int virq, struct irq_host *host);
+
+/**
+ * irq_alloc_host - Allocate a new irq_host data structure
+ * @of_node: optional device-tree node of the interrupt controller
+ * @revmap_type: type of reverse mapping to use
+ * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
+ * @ops: map/unmap host callbacks
+ * @inval_irq: provide a hw number in that host space that is always invalid
+ *
+ * Allocates and initialize and irq_host structure. Note that in the case of
+ * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns
+ * for all legacy interrupts except 0 (which is always the invalid irq for
+ * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by
+ * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated
+ * later during boot automatically (the reverse mapping will use the slow path
+ * until that happens).
+ */
+extern struct irq_host *irq_alloc_host(struct device_node *of_node,
+				       unsigned int revmap_type,
+				       unsigned int revmap_arg,
+				       struct irq_host_ops *ops,
+				       irq_hw_number_t inval_irq);
+
+
+/**
+ * irq_find_host - Locates a host for a given device node
+ * @node: device-tree node of the interrupt controller
+ */
+extern struct irq_host *irq_find_host(struct device_node *node);
+
+
+/**
+ * irq_set_default_host - Set a "default" host
+ * @host: default host pointer
+ *
+ * For convenience, it's possible to set a "default" host that will be used
+ * whenever NULL is passed to irq_create_mapping(). It makes life easier for
+ * platforms that want to manipulate a few hard coded interrupt numbers that
+ * aren't properly represented in the device-tree.
+ */
+extern void irq_set_default_host(struct irq_host *host);
+
+
+/**
+ * irq_set_virq_count - Set the maximum number of virt irqs
+ * @count: number of linux virtual irqs, capped with NR_IRQS
+ *
+ * This is mainly for use by platforms like iSeries who want to program
+ * the virtual irq number in the controller to avoid the reverse mapping
+ */
+extern void irq_set_virq_count(unsigned int count);
+
+
+/**
+ * irq_create_mapping - Map a hardware interrupt into linux virq space
+ * @host: host owning this hardware interrupt or NULL for default host
+ * @hwirq: hardware irq number in that host space
+ *
+ * Only one mapping per hardware interrupt is permitted. Returns a linux
+ * virq number.
+ * If the sense/trigger is to be specified, set_irq_type() should be called
+ * on the number returned from that call.
+ */
+extern unsigned int irq_create_mapping(struct irq_host *host,
+				       irq_hw_number_t hwirq);
+
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ */
+extern void irq_dispose_mapping(unsigned int virq);
+
+/**
+ * irq_find_mapping - Find a linux virq from an hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a slow path, for use by generic code. It's expected that an
+ * irq controller implementation directly calls the appropriate low level
+ * mapping function.
+ */
+extern unsigned int irq_find_mapping(struct irq_host *host,
+				     irq_hw_number_t hwirq);
+
+/**
+ * irq_create_direct_mapping - Allocate a virq for direct mapping
+ * @host: host to allocate the virq for or NULL for default host
+ *
+ * This routine is used for irq controllers which can choose the hardware
+ * interrupt numbers they generate. In such a case it's simplest to use
+ * the linux virq as the hardware interrupt number.
+ */
+extern unsigned int irq_create_direct_mapping(struct irq_host *host);
+
+/**
+ * irq_radix_revmap_insert - Insert a hw irq to linux virq number mapping.
+ * @host: host owning this hardware interrupt
+ * @virq: linux irq number
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is for use by irq controllers that use a radix tree reverse
+ * mapping for fast lookup.
+ */
+extern void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
+				    irq_hw_number_t hwirq);
+
+/**
+ * irq_radix_revmap_lookup - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses radix tree
+ * revmaps
+ */
+extern unsigned int irq_radix_revmap_lookup(struct irq_host *host,
+					    irq_hw_number_t hwirq);
+
+/**
+ * irq_linear_revmap - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses linear
+ * revmaps. It does fallback to the slow path if the revmap doesn't exist
+ * yet and will create the revmap entry with appropriate locking
+ */
+
+extern unsigned int irq_linear_revmap(struct irq_host *host,
+				      irq_hw_number_t hwirq);
+
+
+
+/**
+ * irq_alloc_virt - Allocate virtual irq numbers
+ * @host: host owning these new virtual irqs
+ * @count: number of consecutive numbers to allocate
+ * @hint: pass a hint number, the allocator will try to use a 1:1 mapping
+ *
+ * This is a low level function that is used internally by irq_create_mapping()
+ * and that can be used by some irq controllers implementations for things
+ * like allocating ranges of numbers for MSIs. The revmaps are left untouched.
+ */
+extern unsigned int irq_alloc_virt(struct irq_host *host,
+				   unsigned int count,
+				   unsigned int hint);
+
+/**
+ * irq_free_virt - Free virtual irq numbers
+ * @virq: virtual irq number of the first interrupt to free
+ * @count: number of interrupts to free
+ *
+ * This function is the opposite of irq_alloc_virt. It will not clear reverse
+ * maps, this should be done previously by unmap'ing the interrupt. In fact,
+ * all interrupts covered by the range being freed should have been unmapped
+ * prior to calling this.
+ */
+extern void irq_free_virt(unsigned int virq, unsigned int count);
+
+extern void __init init_pic_c64xplus(void);
+
+extern void init_IRQ(void);
+
+struct pt_regs;
+
+extern asmlinkage void c6x_do_IRQ(unsigned int prio, struct pt_regs *regs);
+
+extern unsigned long irq_err_count;
+
+#endif /* _ASM_C6X_IRQ_H */
diff --git a/arch/c6x/include/asm/irqflags.h b/arch/c6x/include/asm/irqflags.h
new file mode 100644
index 0000000..cf78e09
--- /dev/null
+++ b/arch/c6x/include/asm/irqflags.h
@@ -0,0 +1,72 @@
+/*
+ *  C6X IRQ flag handling
+ *
+ * Copyright (C) 2010 Texas Instruments Incorporated
+ * Written by Mark Salter (msalter@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+/* read interrupt enabled status */
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	asm volatile (" mvc .s2 CSR,%0\n" : "=b"(flags));
+	return flags;
+}
+
+/* set interrupt enabled status */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags));
+}
+
+/* unconditionally enable interrupts */
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	flags |= 1;
+	arch_local_irq_restore(flags);
+}
+
+/* unconditionally disable interrupts */
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	flags &= ~1;
+	arch_local_irq_restore(flags);
+}
+
+/* get status and disable interrupts */
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+	flags = arch_local_save_flags();
+	arch_local_irq_restore(flags & ~1);
+	return flags;
+}
+
+/* test flags */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & 1) == 0;
+}
+
+/* test hardware interrupt enable bit */
+static inline int arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/c6x/include/asm/linkage.h b/arch/c6x/include/asm/linkage.h
new file mode 100644
index 0000000..376925c
--- /dev/null
+++ b/arch/c6x/include/asm/linkage.h
@@ -0,0 +1,30 @@
+#ifndef _ASM_C6X_LINKAGE_H
+#define _ASM_C6X_LINKAGE_H
+
+#ifdef __ASSEMBLER__
+
+#define __ALIGN		.align 2
+#define __ALIGN_STR	".align 2"
+
+#ifndef __DSBT__
+#define ENTRY(name)		\
+	.global name @		\
+	__ALIGN @		\
+name:
+#else
+#define ENTRY(name)		\
+	.global name @		\
+	.hidden name @		\
+	__ALIGN @		\
+name:
+#endif
+
+#define ENDPROC(name)		\
+	.type name, @function @	\
+	.size name, . - name
+
+#endif
+
+#include <asm-generic/linkage.h>
+
+#endif /* _ASM_C6X_LINKAGE_H */
diff --git a/arch/c6x/include/asm/megamod-pic.h b/arch/c6x/include/asm/megamod-pic.h
new file mode 100644
index 0000000..eca0a86
--- /dev/null
+++ b/arch/c6x/include/asm/megamod-pic.h
@@ -0,0 +1,9 @@
+#ifndef _C6X_MEGAMOD_PIC_H
+#define _C6X_MEGAMOD_PIC_H
+
+#ifdef __KERNEL__
+
+extern void __init megamod_pic_init(void);
+
+#endif /* __KERNEL__ */
+#endif /* _C6X_MEGAMOD_PIC_H */
diff --git a/arch/c6x/include/asm/mmu.h b/arch/c6x/include/asm/mmu.h
new file mode 100644
index 0000000..41592bf
--- /dev/null
+++ b/arch/c6x/include/asm/mmu.h
@@ -0,0 +1,18 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_MMU_H
+#define _ASM_C6X_MMU_H
+
+typedef struct {
+	unsigned long		end_brk;
+} mm_context_t;
+
+#endif /* _ASM_C6X_MMU_H */
diff --git a/arch/c6x/include/asm/module.h b/arch/c6x/include/asm/module.h
new file mode 100644
index 0000000..a453f97
--- /dev/null
+++ b/arch/c6x/include/asm/module.h
@@ -0,0 +1,33 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.34 by: Mark Salter (msalter@redhat.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_MODULE_H
+#define _ASM_C6X_MODULE_H
+
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Addr	Elf32_Addr
+#define Elf_Word	Elf32_Word
+
+/*
+ * This file contains the C6x architecture specific module code.
+ */
+struct mod_arch_specific {
+};
+
+struct loaded_sections {
+	unsigned int new_vaddr;
+	unsigned int loaded;
+};
+
+#endif /* _ASM_C6X_MODULE_H */
diff --git a/arch/c6x/include/asm/mutex.h b/arch/c6x/include/asm/mutex.h
new file mode 100644
index 0000000..7a7248e
--- /dev/null
+++ b/arch/c6x/include/asm/mutex.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_C6X_MUTEX_H
+#define _ASM_C6X_MUTEX_H
+
+#include <asm-generic/mutex-null.h>
+
+#endif /* _ASM_C6X_MUTEX_H */
diff --git a/arch/c6x/include/asm/page.h b/arch/c6x/include/asm/page.h
new file mode 100644
index 0000000..d18e2b0
--- /dev/null
+++ b/arch/c6x/include/asm/page.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_C6X_PAGE_H
+#define _ASM_C6X_PAGE_H
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/page.h>
+
+#endif /* _ASM_C6X_PAGE_H */
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
new file mode 100644
index 0000000..68c8af4
--- /dev/null
+++ b/arch/c6x/include/asm/pgtable.h
@@ -0,0 +1,81 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_PGTABLE_H
+#define _ASM_C6X_PGTABLE_H
+
+#include <asm-generic/4level-fixup.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+
+/*
+ * All 32bit addresses are effectively valid for vmalloc...
+ * Sort of meaningless for non-VM targets.
+ */
+#define	VMALLOC_START	0
+#define	VMALLOC_END	0xffffffff
+
+#define pgd_present(pgd)	(1)
+#define pgd_none(pgd)		(0)
+#define pgd_bad(pgd)		(0)
+#define pgd_clear(pgdp)
+#define kern_addr_valid(addr) (1)
+
+#define pmd_offset(a, b)	((void *)0)
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x)		(pmd_val(x) & ~PAGE_MASK)
+
+#define PAGE_NONE		__pgprot(0)    /* these mean nothing to NO_MM */
+#define PAGE_SHARED		__pgprot(0)    /* these mean nothing to NO_MM */
+#define PAGE_COPY		__pgprot(0)    /* these mean nothing to NO_MM */
+#define PAGE_READONLY	        __pgprot(0)    /* these mean nothing to NO_MM */
+#define PAGE_KERNEL		__pgprot(0)    /* these mean nothing to NO_MM */
+#define pgprot_noncached(prot)	(prot)
+
+extern void paging_init(void);
+
+#define __swp_type(x)		(0)
+#define __swp_offset(x)		(0)
+#define __swp_entry(typ, off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+static inline int pte_file(pte_t pte)
+{
+	return 0;
+}
+
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
+extern unsigned long empty_zero_page;
+
+#define swapper_pg_dir ((pgd_t *) 0)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()   do { } while (0)
+#define io_remap_pfn_range      remap_pfn_range
+
+#define io_remap_page_range(vma, vaddr, paddr, size, prot)		\
+		remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+
+#include <asm-generic/pgtable.h>
+
+#endif /* _ASM_C6X_PGTABLE_H */
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
new file mode 100644
index 0000000..8154c4e
--- /dev/null
+++ b/arch/c6x/include/asm/processor.h
@@ -0,0 +1,132 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_PROCESSOR_H
+#define _ASM_C6X_PROCESSOR_H
+
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/current.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr()			\
+({						\
+	void *__pc;				\
+	asm("mvc .S2 pce1,%0\n" : "=b"(__pc));	\
+	__pc;					\
+})
+
+/*
+ * User space process size. This is mostly meaningless for NOMMU
+ * but some C6X processors may have RAM addresses up to 0xFFFFFFFF.
+ * Since calls like mmap() can return an address or an error, we
+ * have to allow room for error returns when code does something
+ * like:
+ *
+ *       addr = do_mmap(...)
+ *       if ((unsigned long)addr >= TASK_SIZE)
+ *            ... its an error code, not an address ...
+ *
+ * Here, we allow for 4096 error codes which means we really can't
+ * use the last 4K page on systems with RAM extending all the way
+ * to the end of the 32-bit address space.
+ */
+#define TASK_SIZE	0xFFFFF000
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's. We won't be using it
+ */
+#define TASK_UNMAPPED_BASE	0
+
+struct thread_struct {
+	unsigned long long b15_14;
+	unsigned long long a15_14;
+	unsigned long long b13_12;
+	unsigned long long a13_12;
+	unsigned long long b11_10;
+	unsigned long long a11_10;
+	unsigned long long ricl_icl;
+	unsigned long  usp;		/* user stack pointer */
+	unsigned long  pc;		/* kernel pc */
+	unsigned long  wchan;
+};
+
+#define INIT_THREAD					\
+{							\
+	.usp = 0,					\
+	.wchan = 0,					\
+}
+
+#define INIT_MMAP { \
+	&init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, \
+	NULL, NULL }
+
+#define task_pt_regs(task) \
+	((struct pt_regs *)(THREAD_START_SP + task_stack_page(task)) - 1)
+
+#define alloc_kernel_stack()	__get_free_page(GFP_KERNEL)
+#define free_kernel_stack(page) free_page((page))
+
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+extern void start_thread(struct pt_regs *regs, unsigned int pc,
+			 unsigned long usp);
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+#define copy_segments(tsk, mm)		do { } while (0)
+#define release_segments(mm)		do { } while (0)
+
+/*
+ * saved PC of a blocked thread.
+ */
+#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
+
+/*
+ * saved kernel SP and DP of a blocked thread.
+ */
+#ifdef _BIG_ENDIAN
+#define thread_saved_ksp(tsk) \
+	(*(unsigned long *)&(tsk)->thread.b15_14)
+#define thread_saved_dp(tsk) \
+	(*(((unsigned long *)&(tsk)->thread.b15_14) + 1))
+#else
+#define thread_saved_ksp(tsk) \
+	(*(((unsigned long *)&(tsk)->thread.b15_14) + 1))
+#define thread_saved_dp(tsk) \
+	(*(unsigned long *)&(tsk)->thread.b15_14)
+#endif
+
+extern unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk)	(task_pt_regs(task)->pc)
+#define	KSTK_ESP(tsk)	(task_pt_regs(task)->sp)
+
+#define cpu_relax()		do { } while (0)
+
+extern const struct seq_operations cpuinfo_op;
+
+#endif /* ASM_C6X_PROCESSOR_H */
diff --git a/arch/c6x/include/asm/procinfo.h b/arch/c6x/include/asm/procinfo.h
new file mode 100644
index 0000000..c139d1e
--- /dev/null
+++ b/arch/c6x/include/asm/procinfo.h
@@ -0,0 +1,28 @@
+/*
+ *  Copyright (C) 2010 Texas Instruments Incorporated
+ *  Author: Mark Salter (msalter@redhat.com)
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_PROCINFO_H
+#define _ASM_C6X_PROCINFO_H
+
+#ifdef __KERNEL__
+
+struct proc_info_list {
+	unsigned int		cpu_val;
+	unsigned int		cpu_mask;
+	const char		*arch_name;
+	const char		*elf_name;
+	unsigned int		elf_hwcap;
+};
+
+#else	/* __KERNEL__ */
+#include <asm/elf.h>
+#warning "Please include asm/elf.h instead"
+#endif	/* __KERNEL__ */
+
+#endif	/* _ASM_C6X_PROCINFO_H */
diff --git a/arch/c6x/include/asm/prom.h b/arch/c6x/include/asm/prom.h
new file mode 100644
index 0000000..b4ec95f
--- /dev/null
+++ b/arch/c6x/include/asm/prom.h
@@ -0,0 +1 @@
+/* dummy prom.h; here to make linux/of.h's #includes happy */
diff --git a/arch/c6x/include/asm/ptrace.h b/arch/c6x/include/asm/ptrace.h
new file mode 100644
index 0000000..21e8d79
--- /dev/null
+++ b/arch/c6x/include/asm/ptrace.h
@@ -0,0 +1,174 @@
+/*
+ *  Copyright (C) 2004, 2006, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_PTRACE_H
+#define _ASM_C6X_PTRACE_H
+
+#define BKPT_OPCODE	0x56454314	/* illegal opcode */
+
+#ifdef _BIG_ENDIAN
+#define PT_LO(odd, even)  odd
+#define PT_HI(odd, even)  even
+#else
+#define PT_LO(odd, even)  even
+#define PT_HI(odd, even)  odd
+#endif
+
+#define PT_A4_ORG  PT_LO(1, 0)
+#define PT_TSR	   PT_HI(1, 0)
+#define PT_ILC	   PT_LO(3, 2)
+#define PT_RILC    PT_HI(3, 2)
+#define PT_CSR	   PT_LO(5, 4)
+#define PT_PC	   PT_HI(5, 4)
+#define PT_B16	   PT_LO(7, 6)
+#define PT_B17	   PT_HI(7, 6)
+#define PT_B18	   PT_LO(9, 8)
+#define PT_B19	   PT_HI(9, 8)
+#define PT_B20	   PT_LO(11, 10)
+#define PT_B21	   PT_HI(11, 10)
+#define PT_B22	   PT_LO(13, 12)
+#define PT_B23	   PT_HI(13, 12)
+#define PT_B24	   PT_LO(15, 14)
+#define PT_B25	   PT_HI(15, 14)
+#define PT_B26	   PT_LO(17, 16)
+#define PT_B27	   PT_HI(17, 16)
+#define PT_B28	   PT_LO(19, 18)
+#define PT_B29	   PT_HI(19, 18)
+#define PT_B30	   PT_LO(21, 20)
+#define PT_B31	   PT_HI(21, 20)
+#define PT_B0	   PT_LO(23, 22)
+#define PT_B1	   PT_HI(23, 22)
+#define PT_B2	   PT_LO(25, 24)
+#define PT_B3	   PT_HI(25, 24)
+#define PT_B4	   PT_LO(27, 26)
+#define PT_B5	   PT_HI(27, 26)
+#define PT_B6	   PT_LO(29, 28)
+#define PT_B7	   PT_HI(29, 28)
+#define PT_B8	   PT_LO(31, 30)
+#define PT_B9	   PT_HI(31, 30)
+#define PT_B10	   PT_LO(33, 32)
+#define PT_B11	   PT_HI(33, 32)
+#define PT_B12	   PT_LO(35, 34)
+#define PT_B13	   PT_HI(35, 34)
+#define PT_A16	   PT_LO(37, 36)
+#define PT_A17	   PT_HI(37, 36)
+#define PT_A18	   PT_LO(39, 38)
+#define PT_A19	   PT_HI(39, 38)
+#define PT_A20	   PT_LO(41, 40)
+#define PT_A21	   PT_HI(41, 40)
+#define PT_A22	   PT_LO(43, 42)
+#define PT_A23	   PT_HI(43, 42)
+#define PT_A24	   PT_LO(45, 44)
+#define PT_A25	   PT_HI(45, 44)
+#define PT_A26	   PT_LO(47, 46)
+#define PT_A27	   PT_HI(47, 46)
+#define PT_A28	   PT_LO(49, 48)
+#define PT_A29	   PT_HI(49, 48)
+#define PT_A30	   PT_LO(51, 50)
+#define PT_A31	   PT_HI(51, 50)
+#define PT_A0	   PT_LO(53, 52)
+#define PT_A1	   PT_HI(53, 52)
+#define PT_A2	   PT_LO(55, 54)
+#define PT_A3	   PT_HI(55, 54)
+#define PT_A4	   PT_LO(57, 56)
+#define PT_A5	   PT_HI(57, 56)
+#define PT_A6	   PT_LO(59, 58)
+#define PT_A7	   PT_HI(59, 58)
+#define PT_A8	   PT_LO(61, 60)
+#define PT_A9	   PT_HI(61, 60)
+#define PT_A10	   PT_LO(63, 62)
+#define PT_A11	   PT_HI(63, 62)
+#define PT_A12	   PT_LO(65, 64)
+#define PT_A13	   PT_HI(65, 64)
+#define PT_A14	   PT_LO(67, 66)
+#define PT_A15	   PT_HI(67, 66)
+#define PT_B14	   PT_LO(69, 68)
+#define PT_B15	   PT_HI(69, 68)
+
+#define NR_PTREGS  70
+
+#define PT_DP	   PT_B14  /* Data Segment Pointer (B14) */
+#define PT_SP	   PT_B15  /* Stack Pointer (B15)  */
+
+#ifndef __ASSEMBLY__
+
+#ifdef _BIG_ENDIAN
+#define REG_PAIR(odd, even) unsigned long odd; unsigned long even
+#else
+#define REG_PAIR(odd, even) unsigned long even; unsigned long odd
+#endif
+
+/*
+ * this struct defines the way the registers are stored on the
+ * stack during a system call. fields defined with REG_PAIR
+ * are saved and restored using double-word memory operations
+ * which means the word ordering of the pair depends on endianess.
+ */
+struct pt_regs {
+	REG_PAIR(tsr, orig_a4);
+	REG_PAIR(rilc, ilc);
+	REG_PAIR(pc, csr);
+
+	REG_PAIR(b17, b16);
+	REG_PAIR(b19, b18);
+	REG_PAIR(b21, b20);
+	REG_PAIR(b23, b22);
+	REG_PAIR(b25, b24);
+	REG_PAIR(b27, b26);
+	REG_PAIR(b29, b28);
+	REG_PAIR(b31, b30);
+
+	REG_PAIR(b1, b0);
+	REG_PAIR(b3, b2);
+	REG_PAIR(b5, b4);
+	REG_PAIR(b7, b6);
+	REG_PAIR(b9, b8);
+	REG_PAIR(b11, b10);
+	REG_PAIR(b13, b12);
+
+	REG_PAIR(a17, a16);
+	REG_PAIR(a19, a18);
+	REG_PAIR(a21, a20);
+	REG_PAIR(a23, a22);
+	REG_PAIR(a25, a24);
+	REG_PAIR(a27, a26);
+	REG_PAIR(a29, a28);
+	REG_PAIR(a31, a30);
+
+	REG_PAIR(a1, a0);
+	REG_PAIR(a3, a2);
+	REG_PAIR(a5, a4);
+	REG_PAIR(a7, a6);
+	REG_PAIR(a9, a8);
+	REG_PAIR(a11, a10);
+	REG_PAIR(a13, a12);
+
+	REG_PAIR(a15, a14);
+	REG_PAIR(sp, dp);
+};
+
+#ifdef __KERNEL__
+
+#include <linux/linkage.h>
+
+#define user_mode(regs)	((((regs)->tsr) & 0x40) != 0)
+
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(regs) ((regs)->sp)
+
+extern void show_regs(struct pt_regs *);
+
+extern asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_exit(struct pt_regs *regs);
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_C6X_PTRACE_H */
diff --git a/arch/c6x/include/asm/sections.h b/arch/c6x/include/asm/sections.h
new file mode 100644
index 0000000..f703989
--- /dev/null
+++ b/arch/c6x/include/asm/sections.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_C6X_SECTIONS_H
+#define _ASM_C6X_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _vectors_start[];
+extern char _vectors_end[];
+
+extern char _data_lma[];
+extern char _fdt_start[], _fdt_end[];
+
+#endif /* _ASM_C6X_SECTIONS_H */
diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h
new file mode 100644
index 0000000..1808f27
--- /dev/null
+++ b/arch/c6x/include/asm/setup.h
@@ -0,0 +1,32 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_SETUP_H
+#define _ASM_C6X_SETUP_H
+
+#define COMMAND_LINE_SIZE   1024
+
+#ifndef __ASSEMBLY__
+extern char c6x_command_line[COMMAND_LINE_SIZE];
+
+extern int c6x_add_memory(phys_addr_t start, unsigned long size);
+
+extern unsigned long ram_start;
+extern unsigned long ram_end;
+
+extern int c6x_num_cores;
+extern unsigned int c6x_silicon_rev;
+extern unsigned int c6x_devstat;
+extern unsigned char c6x_fuse_mac[6];
+
+extern void machine_init(unsigned long dt_ptr);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_C6X_SETUP_H */
diff --git a/arch/c6x/include/asm/sigcontext.h b/arch/c6x/include/asm/sigcontext.h
new file mode 100644
index 0000000..eb702f3
--- /dev/null
+++ b/arch/c6x/include/asm/sigcontext.h
@@ -0,0 +1,80 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_SIGCONTEXT_H
+#define _ASM_C6X_SIGCONTEXT_H
+
+
+struct sigcontext {
+	unsigned long  sc_mask;		/* old sigmask */
+	unsigned long  sc_sp;		/* old user stack pointer */
+
+	unsigned long  sc_a4;
+	unsigned long  sc_b4;
+	unsigned long  sc_a6;
+	unsigned long  sc_b6;
+	unsigned long  sc_a8;
+	unsigned long  sc_b8;
+
+	unsigned long  sc_a0;
+	unsigned long  sc_a1;
+	unsigned long  sc_a2;
+	unsigned long  sc_a3;
+	unsigned long  sc_a5;
+	unsigned long  sc_a7;
+	unsigned long  sc_a9;
+
+	unsigned long  sc_b0;
+	unsigned long  sc_b1;
+	unsigned long  sc_b2;
+	unsigned long  sc_b3;
+	unsigned long  sc_b5;
+	unsigned long  sc_b7;
+	unsigned long  sc_b9;
+
+	unsigned long  sc_a16;
+	unsigned long  sc_a17;
+	unsigned long  sc_a18;
+	unsigned long  sc_a19;
+	unsigned long  sc_a20;
+	unsigned long  sc_a21;
+	unsigned long  sc_a22;
+	unsigned long  sc_a23;
+	unsigned long  sc_a24;
+	unsigned long  sc_a25;
+	unsigned long  sc_a26;
+	unsigned long  sc_a27;
+	unsigned long  sc_a28;
+	unsigned long  sc_a29;
+	unsigned long  sc_a30;
+	unsigned long  sc_a31;
+
+	unsigned long  sc_b16;
+	unsigned long  sc_b17;
+	unsigned long  sc_b18;
+	unsigned long  sc_b19;
+	unsigned long  sc_b20;
+	unsigned long  sc_b21;
+	unsigned long  sc_b22;
+	unsigned long  sc_b23;
+	unsigned long  sc_b24;
+	unsigned long  sc_b25;
+	unsigned long  sc_b26;
+	unsigned long  sc_b27;
+	unsigned long  sc_b28;
+	unsigned long  sc_b29;
+	unsigned long  sc_b30;
+	unsigned long  sc_b31;
+
+	unsigned long  sc_csr;
+	unsigned long  sc_pc;
+};
+
+#endif /* _ASM_C6X_SIGCONTEXT_H */
diff --git a/arch/c6x/include/asm/signal.h b/arch/c6x/include/asm/signal.h
new file mode 100644
index 0000000..f1cd870
--- /dev/null
+++ b/arch/c6x/include/asm/signal.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_C6X_SIGNAL_H
+#define _ASM_C6X_SIGNAL_H
+
+#include <asm-generic/signal.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/linkage.h>
+
+struct pt_regs;
+
+extern asmlinkage int do_rt_sigreturn(struct pt_regs *regs);
+extern asmlinkage void do_notify_resume(struct pt_regs *regs,
+					u32 thread_info_flags,
+					int syscall);
+#endif
+
+#endif /* _ASM_C6X_SIGNAL_H */
diff --git a/arch/c6x/include/asm/soc.h b/arch/c6x/include/asm/soc.h
new file mode 100644
index 0000000..43f5015
--- /dev/null
+++ b/arch/c6x/include/asm/soc.h
@@ -0,0 +1,35 @@
+/*
+ * Miscellaneous SoC-specific hooks.
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef _ASM_C6X_SOC_H
+#define _ASM_C6X_SOC_H
+
+struct soc_ops {
+	/* Return active exception event or -1 if none */
+	int		(*get_exception)(void);
+
+	/* Assert an event */
+	void		(*assert_event)(unsigned int evt);
+};
+
+extern struct soc_ops soc_ops;
+
+extern int soc_get_exception(void);
+extern void soc_assert_event(unsigned int event);
+extern int soc_mac_addr(unsigned int index, u8 *addr);
+
+/*
+ * for mmio on SoC devices. regs are always same byte order as cpu.
+ */
+#define soc_readl(addr)    __raw_readl(addr)
+#define soc_writel(b, addr) __raw_writel((b), (addr))
+
+#endif /* _ASM_C6X_SOC_H */
diff --git a/arch/c6x/include/asm/string.h b/arch/c6x/include/asm/string.h
new file mode 100644
index 0000000..b21517c
--- /dev/null
+++ b/arch/c6x/include/asm/string.h
@@ -0,0 +1,21 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_STRING_H
+#define _ASM_C6X_STRING_H
+
+#include <asm/page.h>
+#include <linux/linkage.h>
+
+asmlinkage extern void *memcpy(void *to, const void *from, size_t n);
+
+#define __HAVE_ARCH_MEMCPY
+
+#endif /* _ASM_C6X_STRING_H */
diff --git a/arch/c6x/include/asm/swab.h b/arch/c6x/include/asm/swab.h
new file mode 100644
index 0000000..fd4bb05
--- /dev/null
+++ b/arch/c6x/include/asm/swab.h
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_SWAB_H
+#define _ASM_C6X_SWAB_H
+
+static inline __attribute_const__ __u16 __c6x_swab16(__u16 val)
+{
+	asm("swap4 .l1 %0,%0\n" : "+a"(val));
+	return val;
+}
+
+static inline __attribute_const__ __u32 __c6x_swab32(__u32 val)
+{
+	asm("swap4 .l1 %0,%0\n"
+	    "swap2 .l1 %0,%0\n"
+	    : "+a"(val));
+	return val;
+}
+
+static inline __attribute_const__ __u64 __c6x_swab64(__u64 val)
+{
+	asm("   swap2 .s1 %p0,%P0\n"
+	    "|| swap2 .l1 %P0,%p0\n"
+	    "   swap4 .l1 %p0,%p0\n"
+	    "   swap4 .l1 %P0,%P0\n"
+	    : "+a"(val));
+	return val;
+}
+
+static inline __attribute_const__ __u32 __c6x_swahw32(__u32 val)
+{
+	asm("swap2 .l1 %0,%0\n" : "+a"(val));
+	return val;
+}
+
+static inline __attribute_const__ __u32 __c6x_swahb32(__u32 val)
+{
+	asm("swap4 .l1 %0,%0\n" : "+a"(val));
+	return val;
+}
+
+#define __arch_swab16 __c6x_swab16
+#define __arch_swab32 __c6x_swab32
+#define __arch_swab64 __c6x_swab64
+#define __arch_swahw32 __c6x_swahw32
+#define __arch_swahb32 __c6x_swahb32
+
+#endif /* _ASM_C6X_SWAB_H */
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
new file mode 100644
index 0000000..ae2be31
--- /dev/null
+++ b/arch/c6x/include/asm/syscall.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __ASM_C6X_SYSCALL_H
+#define __ASM_C6X_SYSCALL_H
+
+#include <linux/err.h>
+#include <linux/sched.h>
+
+static inline int syscall_get_nr(struct task_struct *task,
+				 struct pt_regs *regs)
+{
+	return regs->b0;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->a4) ? regs->a4 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->a4;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->a4 = error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs, unsigned int i,
+					 unsigned int n, unsigned long *args)
+{
+	switch (i) {
+	case 0:
+		if (!n--)
+			break;
+		*args++ = regs->a4;
+	case 1:
+		if (!n--)
+			break;
+		*args++ = regs->b4;
+	case 2:
+		if (!n--)
+			break;
+		*args++ = regs->a6;
+	case 3:
+		if (!n--)
+			break;
+		*args++ = regs->b6;
+	case 4:
+		if (!n--)
+			break;
+		*args++ = regs->a8;
+	case 5:
+		if (!n--)
+			break;
+		*args++ = regs->b8;
+	case 6:
+		if (!n--)
+			break;
+	default:
+		BUG();
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	switch (i) {
+	case 0:
+		if (!n--)
+			break;
+		regs->a4 = *args++;
+	case 1:
+		if (!n--)
+			break;
+		regs->b4 = *args++;
+	case 2:
+		if (!n--)
+			break;
+		regs->a6 = *args++;
+	case 3:
+		if (!n--)
+			break;
+		regs->b6 = *args++;
+	case 4:
+		if (!n--)
+			break;
+		regs->a8 = *args++;
+	case 5:
+		if (!n--)
+			break;
+		regs->a9 = *args++;
+	case 6:
+		if (!n)
+			break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/asm/syscalls.h b/arch/c6x/include/asm/syscalls.h
new file mode 100644
index 0000000..aed53da
--- /dev/null
+++ b/arch/c6x/include/asm/syscalls.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ASM_C6X_SYSCALLS_H
+#define __ASM_C6X_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+
+/* The following are trampolines in entry.S to handle 64-bit arguments */
+extern long sys_pread_c6x(unsigned int fd, char __user *buf,
+			  size_t count, off_t pos_low, off_t pos_high);
+extern long sys_pwrite_c6x(unsigned int fd, const char __user *buf,
+			   size_t count, off_t pos_low, off_t pos_high);
+extern long sys_truncate64_c6x(const char __user *path,
+			       off_t length_low, off_t length_high);
+extern long sys_ftruncate64_c6x(unsigned int fd,
+			       off_t length_low, off_t length_high);
+extern long sys_fadvise64_c6x(int fd, u32 offset_lo, u32 offset_hi,
+			      u32 len, int advice);
+extern long sys_fadvise64_64_c6x(int fd, u32 offset_lo, u32 offset_hi,
+				u32 len_lo, u32 len_hi, int advice);
+extern long sys_fallocate_c6x(int fd, int mode,
+			      u32 offset_lo, u32 offset_hi,
+			      u32 len_lo, u32 len_hi);
+extern int sys_cache_sync(unsigned long s, unsigned long e);
+
+struct pt_regs;
+
+extern asmlinkage long sys_c6x_clone(struct pt_regs *regs);
+extern asmlinkage long sys_c6x_execve(const char __user *name,
+				      const char __user *const __user *argv,
+				      const char __user *const __user *envp,
+				      struct pt_regs *regs);
+
+
+#include <asm-generic/syscalls.h>
+
+#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/asm/system.h b/arch/c6x/include/asm/system.h
new file mode 100644
index 0000000..e076dc0
--- /dev/null
+++ b/arch/c6x/include/asm/system.h
@@ -0,0 +1,168 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_SYSTEM_H
+#define _ASM_C6X_SYSTEM_H
+
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+
+#define prepare_to_switch()    do { } while (0)
+
+struct task_struct;
+struct thread_struct;
+asmlinkage void *__switch_to(struct thread_struct *prev,
+			     struct thread_struct *next,
+			     struct task_struct *tsk);
+
+#define switch_to(prev, next, last)				\
+	do {							\
+		current->thread.wchan = (u_long) __builtin_return_address(0); \
+		(last) = __switch_to(&(prev)->thread,		\
+				     &(next)->thread, (prev));	\
+		mb();						\
+		current->thread.wchan = 0;			\
+	} while (0)
+
+/* Reset the board */
+#define HARD_RESET_NOW()
+
+#define get_creg(reg) \
+	({ unsigned int __x; \
+	   asm volatile ("mvc .s2 " #reg ",%0\n" : "=b"(__x)); __x; })
+
+#define set_creg(reg, v) \
+	do { unsigned int __x = (unsigned int)(v); \
+		asm volatile ("mvc .s2 %0," #reg "\n" : : "b"(__x)); \
+	} while (0)
+
+#define or_creg(reg, n) \
+	do { unsigned __x, __n = (unsigned)(n);		  \
+		asm volatile ("mvc .s2 " #reg ",%0\n"	  \
+			      "or  .l2 %1,%0,%0\n"	  \
+			      "mvc .s2 %0," #reg "\n"	  \
+			      "nop\n"			  \
+			      : "=&b"(__x) : "b"(__n));	  \
+	} while (0)
+
+#define and_creg(reg, n) \
+	do { unsigned __x, __n = (unsigned)(n);		  \
+		asm volatile ("mvc .s2 " #reg ",%0\n"	  \
+			      "and .l2 %1,%0,%0\n"	  \
+			      "mvc .s2 %0," #reg "\n"	  \
+			      "nop\n"    \
+			      : "=&b"(__x) : "b"(__n));	  \
+	} while (0)
+
+#define get_coreid() (get_creg(DNUM) & 0xff)
+
+/* Set/get IST */
+#define set_ist(x)	set_creg(ISTP, x)
+#define get_ist()       get_creg(ISTP)
+
+/*
+ * Exception management
+ */
+asmlinkage void enable_exception(void);
+#define disable_exception()
+#define get_except_type()        get_creg(EFR)
+#define ack_exception(type)      set_creg(ECR, 1 << (type))
+#define get_iexcept()            get_creg(IERR)
+#define set_iexcept(mask)        set_creg(IERR, (mask))
+
+/*
+ * Misc. functions
+ */
+#define nop()                    asm("NOP\n");
+#define mb()                     barrier()
+#define rmb()                    barrier()
+#define wmb()                    barrier()
+#define set_mb(var, value)       do { var = value;  mb(); } while (0)
+#define set_wmb(var, value)      do { var = value; wmb(); } while (0)
+
+#define smp_mb()	         barrier()
+#define smp_rmb()	         barrier()
+#define smp_wmb()	         barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+
+#define xchg(ptr, x) \
+	((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \
+				    sizeof(*(ptr))))
+#define tas(ptr)    xchg((ptr), 1)
+
+unsigned int _lmbd(unsigned int, unsigned int);
+unsigned int _bitr(unsigned int);
+
+struct __xchg_dummy { unsigned int a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+
+static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size)
+{
+	unsigned int tmp;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (size) {
+	case 1:
+		tmp = 0;
+		tmp = *((unsigned char *) ptr);
+		*((unsigned char *) ptr) = (unsigned char) x;
+		break;
+	case 2:
+		tmp = 0;
+		tmp = *((unsigned short *) ptr);
+		*((unsigned short *) ptr) = x;
+		break;
+	case 4:
+		tmp = 0;
+		tmp = *((unsigned int *) ptr);
+		*((unsigned int *) ptr) = x;
+		break;
+	}
+	local_irq_restore(flags);
+	return tmp;
+}
+
+#include <asm-generic/cmpxchg-local.h>
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n)					\
+	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr),		\
+						     (unsigned long)(o), \
+						     (unsigned long)(n), \
+						     sizeof(*(ptr))))
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+#include <asm-generic/cmpxchg.h>
+
+#define _extu(x, s, e)							\
+	({      unsigned int __x;					\
+		asm volatile ("extu .S2 %3,%1,%2,%0\n" :		\
+			      "=b"(__x) : "n"(s), "n"(e), "b"(x));	\
+	       __x; })
+
+
+extern unsigned int c6x_core_freq;
+
+struct pt_regs;
+
+extern void die(char *str, struct pt_regs *fp, int nr);
+extern asmlinkage int process_exception(struct pt_regs *regs);
+extern void time_init(void);
+extern void free_initmem(void);
+
+extern void (*c6x_restart)(void);
+extern void (*c6x_halt)(void);
+
+#endif /* _ASM_C6X_SYSTEM_H */
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
new file mode 100644
index 0000000..fd99148
--- /dev/null
+++ b/arch/c6x/include/asm/thread_info.h
@@ -0,0 +1,121 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.3x: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_THREAD_INFO_H
+#define _ASM_C6X_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#include <asm/page.h>
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SIZE		4096
+#define THREAD_SHIFT		12
+#define THREAD_ORDER		0
+#else
+#define THREAD_SIZE		8192
+#define THREAD_SHIFT		13
+#define THREAD_ORDER		1
+#endif
+
+#define THREAD_START_SP		(THREAD_SIZE - 8)
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+/*
+ * low level task data.
+ */
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	int			cpu;		/* cpu we're on */
+	int			preempt_count;	/* 0 = preemptable, <0 = BUG */
+	mm_segment_t		addr_limit;	/* thread address space */
+	struct restart_block	restart_block;
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ *
+ * preempt_count needs to be 1 initially, until the scheduler is functional.
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block	= {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/* get the thread information struct of current task */
+static inline __attribute__((const))
+struct thread_info *current_thread_info(void)
+{
+	struct thread_info *ti;
+	asm volatile (" clr   .s2 B15,0,%1,%0\n"
+		      : "=b" (ti)
+		      : "Iu5" (THREAD_SHIFT - 1));
+	return ti;
+}
+
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+#else
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
+#endif
+
+#define alloc_thread_info_node(tsk, node)	\
+	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
+#define free_thread_info(ti)	free_pages((unsigned long) (ti), THREAD_ORDER)
+#define get_thread_info(ti)	get_task_struct((ti)->task)
+#define put_thread_info(ti)	put_task_struct((ti)->task)
+#endif /* __ASSEMBLY__ */
+
+#define	PREEMPT_ACTIVE	0x10000000
+
+/*
+ * thread information flag bit numbers
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* resumption notification requested */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_RESTORE_SIGMASK	4	/* restore signal mask in do_signal() */
+
+#define TIF_POLLING_NRFLAG	16	/* true if polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		17	/* OOM killer killed process */
+
+#define TIF_WORK_MASK		0x00007FFE /* work on irq/exception return */
+#define TIF_ALLWORK_MASK	0x00007FFF /* work on any return to u-space */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_C6X_THREAD_INFO_H */
diff --git a/arch/c6x/include/asm/timer64.h b/arch/c6x/include/asm/timer64.h
new file mode 100644
index 0000000..bbe27bb
--- /dev/null
+++ b/arch/c6x/include/asm/timer64.h
@@ -0,0 +1,6 @@
+#ifndef _C6X_TIMER64_H
+#define _C6X_TIMER64_H
+
+extern void __init timer64_init(void);
+
+#endif /* _C6X_TIMER64_H */
diff --git a/arch/c6x/include/asm/timex.h b/arch/c6x/include/asm/timex.h
new file mode 100644
index 0000000..508c3ec
--- /dev/null
+++ b/arch/c6x/include/asm/timex.h
@@ -0,0 +1,33 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Modified for 2.6.34: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_TIMEX_H
+#define _ASM_C6X_TIMEX_H
+
+#define CLOCK_TICK_RATE ((1000 * 1000000UL) / 6)
+
+/* 64-bit timestamp */
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+	unsigned l, h;
+
+	asm volatile (" dint\n"
+		      " mvc .s2 TSCL,%0\n"
+		      " mvc .s2 TSCH,%1\n"
+		      " rint\n"
+		      : "=b"(l), "=b"(h));
+	return ((cycles_t)h << 32) | l;
+}
+
+#endif /* _ASM_C6X_TIMEX_H */
diff --git a/arch/c6x/include/asm/tlb.h b/arch/c6x/include/asm/tlb.h
new file mode 100644
index 0000000..8709e5e
--- /dev/null
+++ b/arch/c6x/include/asm/tlb.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_C6X_TLB_H
+#define _ASM_C6X_TLB_H
+
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_C6X_TLB_H */
diff --git a/arch/c6x/include/asm/traps.h b/arch/c6x/include/asm/traps.h
new file mode 100644
index 0000000..62124d7
--- /dev/null
+++ b/arch/c6x/include/asm/traps.h
@@ -0,0 +1,36 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_TRAPS_H
+#define _ASM_C6X_TRAPS_H
+
+#define EXCEPT_TYPE_NXF   31	   /* NMI */
+#define EXCEPT_TYPE_EXC   30	   /* external exception */
+#define EXCEPT_TYPE_IXF   1	   /* internal exception */
+#define EXCEPT_TYPE_SXF   0	   /* software exception */
+
+#define EXCEPT_CAUSE_LBX  (1 << 7) /* loop buffer exception */
+#define EXCEPT_CAUSE_PRX  (1 << 6) /* privilege exception */
+#define EXCEPT_CAUSE_RAX  (1 << 5) /* resource access exception */
+#define EXCEPT_CAUSE_RCX  (1 << 4) /* resource conflict exception */
+#define EXCEPT_CAUSE_OPX  (1 << 3) /* opcode exception */
+#define EXCEPT_CAUSE_EPX  (1 << 2) /* execute packet exception */
+#define EXCEPT_CAUSE_FPX  (1 << 1) /* fetch packet exception */
+#define EXCEPT_CAUSE_IFX  (1 << 0) /* instruction fetch exception */
+
+struct exception_info {
+	char *kernel_str;
+	int  signo;
+	int  code;
+};
+
+extern int (*c6x_nmi_handler)(struct pt_regs *regs);
+
+#endif /* _ASM_C6X_TRAPS_H */
diff --git a/arch/c6x/include/asm/uaccess.h b/arch/c6x/include/asm/uaccess.h
new file mode 100644
index 0000000..453dd26
--- /dev/null
+++ b/arch/c6x/include/asm/uaccess.h
@@ -0,0 +1,107 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_UACCESS_H
+#define _ASM_C6X_UACCESS_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_ACCESS_CHECK
+#define __access_ok _access_ok
+#endif
+
+/*
+ * __copy_from_user/copy_to_user are based on ones in asm-generic/uaccess.h
+ *
+ * C6X supports unaligned 32 and 64 bit loads and stores.
+ */
+static inline __must_check long __copy_from_user(void *to,
+		const void __user *from, unsigned long n)
+{
+	u32 tmp32;
+	u64 tmp64;
+
+	if (__builtin_constant_p(n)) {
+		switch (n) {
+		case 1:
+			*(u8 *)to = *(u8 __force *)from;
+			return 0;
+		case 4:
+			asm volatile ("ldnw .d1t1 *%2,%0\n"
+				      "nop  4\n"
+				      "stnw .d1t1 %0,*%1\n"
+				      : "=&a"(tmp32)
+				      : "A"(to), "a"(from)
+				      : "memory");
+			return 0;
+		case 8:
+			asm volatile ("ldndw .d1t1 *%2,%0\n"
+				      "nop   4\n"
+				      "stndw .d1t1 %0,*%1\n"
+				      : "=&a"(tmp64)
+				      : "a"(to), "a"(from)
+				      : "memory");
+			return 0;
+		default:
+			break;
+		}
+	}
+
+	memcpy(to, (const void __force *)from, n);
+	return 0;
+}
+
+static inline __must_check long __copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	u32 tmp32;
+	u64 tmp64;
+
+	if (__builtin_constant_p(n)) {
+		switch (n) {
+		case 1:
+			*(u8 __force *)to = *(u8 *)from;
+			return 0;
+		case 4:
+			asm volatile ("ldnw .d1t1 *%2,%0\n"
+				      "nop  4\n"
+				      "stnw .d1t1 %0,*%1\n"
+				      : "=&a"(tmp32)
+				      : "a"(to), "a"(from)
+				      : "memory");
+			return 0;
+		case 8:
+			asm volatile ("ldndw .d1t1 *%2,%0\n"
+				      "nop   4\n"
+				      "stndw .d1t1 %0,*%1\n"
+				      : "=&a"(tmp64)
+				      : "a"(to), "a"(from)
+				      : "memory");
+			return 0;
+		default:
+			break;
+		}
+	}
+
+	memcpy((void __force *)to, from, n);
+	return 0;
+}
+
+#define __copy_to_user   __copy_to_user
+#define __copy_from_user __copy_from_user
+
+extern int _access_ok(unsigned long addr, unsigned long size);
+#ifdef CONFIG_ACCESS_CHECK
+#define __access_ok _access_ok
+#endif
+
+#include <asm-generic/uaccess.h>
+
+#endif /* _ASM_C6X_UACCESS_H */
diff --git a/arch/c6x/include/asm/unaligned.h b/arch/c6x/include/asm/unaligned.h
new file mode 100644
index 0000000..b976cb7
--- /dev/null
+++ b/arch/c6x/include/asm/unaligned.h
@@ -0,0 +1,170 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *  Rewritten for 2.6.3x: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#ifndef _ASM_C6X_UNALIGNED_H
+#define _ASM_C6X_UNALIGNED_H
+
+#include <linux/swab.h>
+
+/*
+ * The C64x+ can do unaligned word and dword accesses in hardware
+ * using special load/store instructions.
+ */
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+	const u8 *_p = p;
+	return _p[0] | _p[1] << 8;
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+	const u8 *_p = p;
+	return _p[0] << 8 | _p[1];
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+	u8 *_p = p;
+	_p[0] = val;
+	_p[1] = val >> 8;
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+	u8 *_p = p;
+	_p[0] = val >> 8;
+	_p[1] = val;
+}
+
+static inline u32 get_unaligned32(const void *p)
+{
+	u32 val = (u32) p;
+	asm (" ldnw	.d1t1	*%0,%0\n"
+	     " nop     4\n"
+	     : "+a"(val));
+	return val;
+}
+
+static inline void put_unaligned32(u32 val, void *p)
+{
+	asm volatile (" stnw	.d2t1	%0,*%1\n"
+		      : : "a"(val), "b"(p) : "memory");
+}
+
+static inline u64 get_unaligned64(const void *p)
+{
+	u64 val;
+	asm volatile (" ldndw	.d1t1	*%1,%0\n"
+		      " nop     4\n"
+		      : "=a"(val) : "a"(p));
+	return val;
+}
+
+static inline void put_unaligned64(u64 val, const void *p)
+{
+	asm volatile (" stndw	.d2t1	%0,*%1\n"
+		      : : "a"(val), "b"(p) : "memory");
+}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+#define get_unaligned_le32(p)	 __swab32(get_unaligned32(p))
+#define get_unaligned_le64(p)	 __swab64(get_unaligned64(p))
+#define get_unaligned_be32(p)	 get_unaligned32(p)
+#define get_unaligned_be64(p)	 get_unaligned64(p)
+#define put_unaligned_le32(v, p) put_unaligned32(__swab32(v), (p))
+#define put_unaligned_le64(v, p) put_unaligned64(__swab64(v), (p))
+#define put_unaligned_be32(v, p) put_unaligned32((v), (p))
+#define put_unaligned_be64(v, p) put_unaligned64((v), (p))
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
+#else
+
+#define get_unaligned_le32(p)	 get_unaligned32(p)
+#define get_unaligned_le64(p)	 get_unaligned64(p)
+#define get_unaligned_be32(p)	 __swab32(get_unaligned32(p))
+#define get_unaligned_be64(p)	 __swab64(get_unaligned64(p))
+#define put_unaligned_le32(v, p) put_unaligned32((v), (p))
+#define put_unaligned_le64(v, p) put_unaligned64((v), (p))
+#define put_unaligned_be32(v, p) put_unaligned32(__swab32(v), (p))
+#define put_unaligned_be64(v, p) put_unaligned64(__swab64(v), (p))
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+
+#endif
+
+/*
+ * Cause a link-time error if we try an unaligned access other than
+ * 1,2,4 or 8 bytes long
+ */
+extern int __bad_unaligned_access_size(void);
+
+#define __get_unaligned_le(ptr) (typeof(*(ptr)))({			\
+	sizeof(*(ptr)) == 1 ? *(ptr) :					\
+	  (sizeof(*(ptr)) == 2 ? get_unaligned_le16((ptr)) :		\
+	     (sizeof(*(ptr)) == 4 ? get_unaligned_le32((ptr)) :		\
+		(sizeof(*(ptr)) == 8 ? get_unaligned_le64((ptr)) :	\
+		   __bad_unaligned_access_size())));			\
+	})
+
+#define __get_unaligned_be(ptr) (__force typeof(*(ptr)))({	\
+	sizeof(*(ptr)) == 1 ? *(ptr) :					\
+	  (sizeof(*(ptr)) == 2 ? get_unaligned_be16((ptr)) :		\
+	     (sizeof(*(ptr)) == 4 ? get_unaligned_be32((ptr)) :		\
+		(sizeof(*(ptr)) == 8 ? get_unaligned_be64((ptr)) :	\
+		   __bad_unaligned_access_size())));			\
+	})
+
+#define __put_unaligned_le(val, ptr) ({					\
+	void *__gu_p = (ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		*(u8 *)__gu_p = (__force u8)(val);			\
+		break;							\
+	case 2:								\
+		put_unaligned_le16((__force u16)(val), __gu_p);		\
+		break;							\
+	case 4:								\
+		put_unaligned_le32((__force u32)(val), __gu_p);		\
+		break;							\
+	case 8:								\
+		put_unaligned_le64((__force u64)(val), __gu_p);		\
+		break;							\
+	default:							\
+		__bad_unaligned_access_size();				\
+		break;							\
+	}								\
+	(void)0; })
+
+#define __put_unaligned_be(val, ptr) ({					\
+	void *__gu_p = (ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		*(u8 *)__gu_p = (__force u8)(val);			\
+		break;							\
+	case 2:								\
+		put_unaligned_be16((__force u16)(val), __gu_p);		\
+		break;							\
+	case 4:								\
+		put_unaligned_be32((__force u32)(val), __gu_p);		\
+		break;							\
+	case 8:								\
+		put_unaligned_be64((__force u64)(val), __gu_p);		\
+		break;							\
+	default:							\
+		__bad_unaligned_access_size();				\
+		break;							\
+	}								\
+	(void)0; })
+
+#endif /* _ASM_C6X_UNALIGNED_H */
diff --git a/arch/c6x/include/asm/unistd.h b/arch/c6x/include/asm/unistd.h
new file mode 100644
index 0000000..6d54ea4
--- /dev/null
+++ b/arch/c6x/include/asm/unistd.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Based on arch/tile version.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for
+ *   more details.
+ */
+#if !defined(_ASM_C6X_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_C6X_UNISTD_H
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* C6X-specific syscalls. */
+#define __NR_cache_sync	(__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cache_sync, sys_cache_sync)
+
+#endif /* _ASM_C6X_UNISTD_H */
diff --git a/arch/c6x/kernel/Makefile b/arch/c6x/kernel/Makefile
new file mode 100644
index 0000000..580a515
--- /dev/null
+++ b/arch/c6x/kernel/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for arch/c6x/kernel/
+#
+
+extra-y := head.o vmlinux.lds
+
+obj-y := process.o traps.o irq.o signal.o ptrace.o
+obj-y += setup.o sys_c6x.o time.o devicetree.o
+obj-y += switch_to.o entry.o vectors.o c6x_ksyms.o
+obj-y += soc.o dma.o
+
+obj-$(CONFIG_MODULES)           += module.o
diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c
new file mode 100644
index 0000000..759ad6d
--- /dev/null
+++ b/arch/c6x/kernel/asm-offsets.c
@@ -0,0 +1,123 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+#include <asm/procinfo.h>
+#include <linux/kbuild.h>
+#include <linux/unistd.h>
+
+void foo(void)
+{
+	OFFSET(REGS_A16,	pt_regs, a16);
+	OFFSET(REGS_A17,	pt_regs, a17);
+	OFFSET(REGS_A18,	pt_regs, a18);
+	OFFSET(REGS_A19,	pt_regs, a19);
+	OFFSET(REGS_A20,	pt_regs, a20);
+	OFFSET(REGS_A21,	pt_regs, a21);
+	OFFSET(REGS_A22,	pt_regs, a22);
+	OFFSET(REGS_A23,	pt_regs, a23);
+	OFFSET(REGS_A24,	pt_regs, a24);
+	OFFSET(REGS_A25,	pt_regs, a25);
+	OFFSET(REGS_A26,	pt_regs, a26);
+	OFFSET(REGS_A27,	pt_regs, a27);
+	OFFSET(REGS_A28,	pt_regs, a28);
+	OFFSET(REGS_A29,	pt_regs, a29);
+	OFFSET(REGS_A30,	pt_regs, a30);
+	OFFSET(REGS_A31,	pt_regs, a31);
+
+	OFFSET(REGS_B16,	pt_regs, b16);
+	OFFSET(REGS_B17,	pt_regs, b17);
+	OFFSET(REGS_B18,	pt_regs, b18);
+	OFFSET(REGS_B19,	pt_regs, b19);
+	OFFSET(REGS_B20,	pt_regs, b20);
+	OFFSET(REGS_B21,	pt_regs, b21);
+	OFFSET(REGS_B22,	pt_regs, b22);
+	OFFSET(REGS_B23,	pt_regs, b23);
+	OFFSET(REGS_B24,	pt_regs, b24);
+	OFFSET(REGS_B25,	pt_regs, b25);
+	OFFSET(REGS_B26,	pt_regs, b26);
+	OFFSET(REGS_B27,	pt_regs, b27);
+	OFFSET(REGS_B28,	pt_regs, b28);
+	OFFSET(REGS_B29,	pt_regs, b29);
+	OFFSET(REGS_B30,	pt_regs, b30);
+	OFFSET(REGS_B31,	pt_regs, b31);
+
+	OFFSET(REGS_A0,		pt_regs, a0);
+	OFFSET(REGS_A1,		pt_regs, a1);
+	OFFSET(REGS_A2,		pt_regs, a2);
+	OFFSET(REGS_A3,		pt_regs, a3);
+	OFFSET(REGS_A4,		pt_regs, a4);
+	OFFSET(REGS_A5,		pt_regs, a5);
+	OFFSET(REGS_A6,		pt_regs, a6);
+	OFFSET(REGS_A7,		pt_regs, a7);
+	OFFSET(REGS_A8,		pt_regs, a8);
+	OFFSET(REGS_A9,		pt_regs, a9);
+	OFFSET(REGS_A10,	pt_regs, a10);
+	OFFSET(REGS_A11,	pt_regs, a11);
+	OFFSET(REGS_A12,	pt_regs, a12);
+	OFFSET(REGS_A13,	pt_regs, a13);
+	OFFSET(REGS_A14,	pt_regs, a14);
+	OFFSET(REGS_A15,	pt_regs, a15);
+
+	OFFSET(REGS_B0,		pt_regs, b0);
+	OFFSET(REGS_B1,		pt_regs, b1);
+	OFFSET(REGS_B2,		pt_regs, b2);
+	OFFSET(REGS_B3,		pt_regs, b3);
+	OFFSET(REGS_B4,		pt_regs, b4);
+	OFFSET(REGS_B5,		pt_regs, b5);
+	OFFSET(REGS_B6,		pt_regs, b6);
+	OFFSET(REGS_B7,		pt_regs, b7);
+	OFFSET(REGS_B8,		pt_regs, b8);
+	OFFSET(REGS_B9,		pt_regs, b9);
+	OFFSET(REGS_B10,	pt_regs, b10);
+	OFFSET(REGS_B11,	pt_regs, b11);
+	OFFSET(REGS_B12,	pt_regs, b12);
+	OFFSET(REGS_B13,	pt_regs, b13);
+	OFFSET(REGS_DP,		pt_regs, dp);
+	OFFSET(REGS_SP,		pt_regs, sp);
+
+	OFFSET(REGS_TSR,	pt_regs, tsr);
+	OFFSET(REGS_ORIG_A4,	pt_regs, orig_a4);
+
+	DEFINE(REGS__END,	sizeof(struct pt_regs));
+	BLANK();
+
+	OFFSET(THREAD_PC,	thread_struct, pc);
+	OFFSET(THREAD_B15_14,	thread_struct, b15_14);
+	OFFSET(THREAD_A15_14,	thread_struct, a15_14);
+	OFFSET(THREAD_B13_12,	thread_struct, b13_12);
+	OFFSET(THREAD_A13_12,	thread_struct, a13_12);
+	OFFSET(THREAD_B11_10,	thread_struct, b11_10);
+	OFFSET(THREAD_A11_10,	thread_struct, a11_10);
+	OFFSET(THREAD_RICL_ICL,	thread_struct, ricl_icl);
+	BLANK();
+
+	OFFSET(TASK_STATE,	task_struct, state);
+	BLANK();
+
+	OFFSET(THREAD_INFO_FLAGS,	thread_info, flags);
+	OFFSET(THREAD_INFO_PREEMPT_COUNT, thread_info, preempt_count);
+	BLANK();
+
+	/* These would be unneccessary if we ran asm files
+	 * through the preprocessor.
+	 */
+	DEFINE(KTHREAD_SIZE, THREAD_SIZE);
+	DEFINE(KTHREAD_SHIFT, THREAD_SHIFT);
+	DEFINE(KTHREAD_START_SP, THREAD_START_SP);
+	DEFINE(ENOSYS_, ENOSYS);
+	DEFINE(NR_SYSCALLS_, __NR_syscalls);
+
+	DEFINE(_TIF_SYSCALL_TRACE, (1<<TIF_SYSCALL_TRACE));
+	DEFINE(_TIF_NOTIFY_RESUME, (1<<TIF_NOTIFY_RESUME));
+	DEFINE(_TIF_SIGPENDING, (1<<TIF_SIGPENDING));
+	DEFINE(_TIF_NEED_RESCHED, (1<<TIF_NEED_RESCHED));
+	DEFINE(_TIF_POLLING_NRFLAG, (1<<TIF_POLLING_NRFLAG));
+
+	DEFINE(_TIF_ALLWORK_MASK, TIF_ALLWORK_MASK);
+	DEFINE(_TIF_WORK_MASK, TIF_WORK_MASK);
+}
diff --git a/arch/c6x/kernel/c6x_ksyms.c b/arch/c6x/kernel/c6x_ksyms.c
new file mode 100644
index 0000000..0ba3e0b
--- /dev/null
+++ b/arch/c6x/kernel/c6x_ksyms.c
@@ -0,0 +1,66 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <asm/checksum.h>
+#include <linux/io.h>
+
+/*
+ * libgcc functions - used internally by the compiler...
+ */
+extern int __c6xabi_divi(int dividend, int divisor);
+EXPORT_SYMBOL(__c6xabi_divi);
+
+extern unsigned __c6xabi_divu(unsigned	dividend, unsigned divisor);
+EXPORT_SYMBOL(__c6xabi_divu);
+
+extern int __c6xabi_remi(int dividend, int divisor);
+EXPORT_SYMBOL(__c6xabi_remi);
+
+extern unsigned __c6xabi_remu(unsigned	dividend, unsigned divisor);
+EXPORT_SYMBOL(__c6xabi_remu);
+
+extern int __c6xabi_divremi(int dividend, int divisor);
+EXPORT_SYMBOL(__c6xabi_divremi);
+
+extern unsigned __c6xabi_divremu(unsigned  dividend, unsigned divisor);
+EXPORT_SYMBOL(__c6xabi_divremu);
+
+extern unsigned long long __c6xabi_mpyll(unsigned long long src1,
+					 unsigned long long src2);
+EXPORT_SYMBOL(__c6xabi_mpyll);
+
+extern long long __c6xabi_negll(long long src);
+EXPORT_SYMBOL(__c6xabi_negll);
+
+extern unsigned long long __c6xabi_llshl(unsigned long long src1, uint src2);
+EXPORT_SYMBOL(__c6xabi_llshl);
+
+extern long long __c6xabi_llshr(long long src1, uint src2);
+EXPORT_SYMBOL(__c6xabi_llshr);
+
+extern unsigned long long __c6xabi_llshru(unsigned long long src1, uint src2);
+EXPORT_SYMBOL(__c6xabi_llshru);
+
+extern void __c6xabi_strasgi(int *dst, const int *src, unsigned cnt);
+EXPORT_SYMBOL(__c6xabi_strasgi);
+
+extern void __c6xabi_push_rts(void);
+EXPORT_SYMBOL(__c6xabi_push_rts);
+
+extern void __c6xabi_pop_rts(void);
+EXPORT_SYMBOL(__c6xabi_pop_rts);
+
+extern void __c6xabi_strasgi_64plus(int *dst, const int *src, unsigned cnt);
+EXPORT_SYMBOL(__c6xabi_strasgi_64plus);
+
+/* lib functions */
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c
new file mode 100644
index 0000000..bdb56f0
--- /dev/null
+++ b/arch/c6x/kernel/devicetree.c
@@ -0,0 +1,53 @@
+/*
+ *  Architecture specific OF callbacks.
+ *
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/initrd.h>
+#include <linux/memblock.h>
+
+void __init early_init_devtree(void *params)
+{
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/* Retrieve various informations from the /chosen node of the
+	 * device-tree, including the platform type, initrd location and
+	 * size and more ...
+	 */
+	of_scan_flat_dt(early_init_dt_scan_chosen, c6x_command_line);
+
+	/* Scan memory nodes and rebuild MEMBLOCKs */
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+}
+
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+		unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	c6x_add_memory(base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return __va(memblock_alloc(size, align));
+}
diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c
new file mode 100644
index 0000000..ab7b12d
--- /dev/null
+++ b/arch/c6x/kernel/dma.c
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/scatterlist.h>
+
+#include <asm/cacheflush.h>
+
+static void c6x_dma_sync(dma_addr_t handle, size_t size,
+			 enum dma_data_direction dir)
+{
+	unsigned long paddr = handle;
+
+	BUG_ON(!valid_dma_direction(dir));
+
+	switch (dir) {
+	case DMA_FROM_DEVICE:
+		L2_cache_block_invalidate(paddr, paddr + size);
+		break;
+	case DMA_TO_DEVICE:
+		L2_cache_block_writeback(paddr, paddr + size);
+		break;
+	case DMA_BIDIRECTIONAL:
+		L2_cache_block_writeback_invalidate(paddr, paddr + size);
+		break;
+	default:
+		break;
+	}
+}
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+			  enum dma_data_direction dir)
+{
+	dma_addr_t addr = virt_to_phys(ptr);
+
+	c6x_dma_sync(addr, size, dir);
+
+	debug_dma_map_page(dev, virt_to_page(ptr),
+			   (unsigned long)ptr & ~PAGE_MASK, size,
+			   dir, addr, true);
+	return addr;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+
+void dma_unmap_single(struct device *dev, dma_addr_t handle,
+		      size_t size, enum dma_data_direction dir)
+{
+	c6x_dma_sync(handle, size, dir);
+
+	debug_dma_unmap_page(dev, handle, size, dir, true);
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+
+int dma_map_sg(struct device *dev, struct scatterlist *sglist,
+	       int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sglist, sg, nents, i)
+		sg->dma_address = dma_map_single(dev, sg_virt(sg), sg->length,
+						 dir);
+
+	debug_dma_map_sg(dev, sglist, nents, nents, dir);
+
+	return nents;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+		  int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sglist, sg, nents, i)
+		dma_unmap_single(dev, sg_dma_address(sg), sg->length, dir);
+
+	debug_dma_unmap_sg(dev, sglist,	nents, dir);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
+			     size_t size, enum dma_data_direction dir)
+{
+	c6x_dma_sync(handle, size, dir);
+
+	debug_dma_sync_single_for_cpu(dev, handle, size, dir);
+}
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
+				size_t size, enum dma_data_direction dir)
+{
+	c6x_dma_sync(handle, size, dir);
+
+	debug_dma_sync_single_for_device(dev, handle, size, dir);
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
+			 int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sglist, sg, nents, i)
+		dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+					sg->length, dir);
+
+	debug_dma_sync_sg_for_cpu(dev, sglist, nents, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
+			    int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sglist, sg, nents, i)
+		dma_sync_single_for_device(dev, sg_dma_address(sg),
+					   sg->length, dir);
+
+	debug_dma_sync_sg_for_device(dev, sglist, nents, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+
+/* Number of entries preallocated for DMA-API debugging */
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+	return 0;
+}
+fs_initcall(dma_init);
diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S
new file mode 100644
index 0000000..3e977cc
--- /dev/null
+++ b/arch/c6x/kernel/entry.S
@@ -0,0 +1,803 @@
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2004-2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@virtuallogix.com)
+;  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+; Registers naming
+#define DP	B14
+#define SP	B15
+
+#ifndef CONFIG_PREEMPT
+#define resume_kernel restore_all
+#endif
+
+	.altmacro
+
+	.macro MASK_INT reg
+	MVC	.S2	CSR,reg
+	CLR	.S2	reg,0,0,reg
+	MVC	.S2	reg,CSR
+	.endm
+
+	.macro UNMASK_INT reg
+	MVC	.S2	CSR,reg
+	SET	.S2	reg,0,0,reg
+	MVC	.S2	reg,CSR
+	.endm
+
+	.macro GET_THREAD_INFO reg
+	SHR	.S1X	SP,THREAD_SHIFT,reg
+	SHL	.S1	reg,THREAD_SHIFT,reg
+	.endm
+
+	;;
+	;;  This defines the normal kernel pt_regs layout.
+	;;
+	.macro SAVE_ALL __rp __tsr
+	STW	.D2T2	B0,*SP--[2]		; save original B0
+	MVKL	.S2	current_ksp,B0
+	MVKH	.S2	current_ksp,B0
+	LDW	.D2T2	*B0,B1			; KSP
+
+	NOP	3
+	STW	.D2T2	B1,*+SP[1]		; save original B1
+	XOR	.D2	SP,B1,B0		; (SP ^ KSP)
+	LDW	.D2T2	*+SP[1],B1		; restore B0/B1
+	LDW	.D2T2	*++SP[2],B0
+	SHR	.S2	B0,THREAD_SHIFT,B0	; 0 if already using kstack
+  [B0]	STDW	.D2T2	SP:DP,*--B1[1]		; user: save user sp/dp kstack
+  [B0]	MV	.S2	B1,SP			;    and switch to kstack
+||[!B0] STDW	.D2T2	SP:DP,*--SP[1]		; kernel: save on current stack
+
+	SUBAW	.D2	SP,2,SP
+
+	ADD	.D1X	SP,-8,A15
+ ||	STDW	.D2T1	A15:A14,*SP--[16]	; save A15:A14
+
+	STDW	.D2T2	B13:B12,*SP--[1]
+ ||	STDW	.D1T1	A13:A12,*A15--[1]
+ ||	MVC	.S2	__rp,B13
+
+	STDW	.D2T2	B11:B10,*SP--[1]
+ ||	STDW	.D1T1	A11:A10,*A15--[1]
+ ||	MVC	.S2	CSR,B12
+
+	STDW	.D2T2	B9:B8,*SP--[1]
+ ||	STDW	.D1T1	A9:A8,*A15--[1]
+ ||	MVC	.S2	RILC,B11
+	STDW	.D2T2	B7:B6,*SP--[1]
+ ||	STDW	.D1T1	A7:A6,*A15--[1]
+ ||	MVC	.S2	ILC,B10
+
+	STDW	.D2T2	B5:B4,*SP--[1]
+ ||	STDW	.D1T1	A5:A4,*A15--[1]
+
+	STDW	.D2T2	B3:B2,*SP--[1]
+ ||	STDW	.D1T1	A3:A2,*A15--[1]
+ ||	MVC	.S2	__tsr,B5
+
+	STDW	.D2T2	B1:B0,*SP--[1]
+ ||	STDW	.D1T1	A1:A0,*A15--[1]
+ ||	MV	.S1X	B5,A5
+
+	STDW	.D2T2	B31:B30,*SP--[1]
+ ||	STDW	.D1T1	A31:A30,*A15--[1]
+	STDW	.D2T2	B29:B28,*SP--[1]
+ ||	STDW	.D1T1	A29:A28,*A15--[1]
+	STDW	.D2T2	B27:B26,*SP--[1]
+ ||	STDW	.D1T1	A27:A26,*A15--[1]
+	STDW	.D2T2	B25:B24,*SP--[1]
+ ||	STDW	.D1T1	A25:A24,*A15--[1]
+	STDW	.D2T2	B23:B22,*SP--[1]
+ ||	STDW	.D1T1	A23:A22,*A15--[1]
+	STDW	.D2T2	B21:B20,*SP--[1]
+ ||	STDW	.D1T1	A21:A20,*A15--[1]
+	STDW	.D2T2	B19:B18,*SP--[1]
+ ||	STDW	.D1T1	A19:A18,*A15--[1]
+	STDW	.D2T2	B17:B16,*SP--[1]
+ ||	STDW	.D1T1	A17:A16,*A15--[1]
+
+	STDW	.D2T2	B13:B12,*SP--[1]	; save PC and CSR
+
+	STDW	.D2T2	B11:B10,*SP--[1]	; save RILC and ILC
+	STDW	.D2T1	A5:A4,*SP--[1]		; save TSR and orig A4
+
+	;; We left an unused word on the stack just above pt_regs.
+	;; It is used to save whether or not this frame is due to
+	;; a syscall. It is cleared here, but the syscall handler
+	;; sets it to a non-zero value.
+	MVK	.L2	0,B1
+	STW	.D2T2	B1,*+SP(REGS__END+8)	; clear syscall flag
+	.endm
+
+	.macro RESTORE_ALL __rp __tsr
+	LDDW	.D2T2	*++SP[1],B9:B8		; get TSR (B9)
+	LDDW	.D2T2	*++SP[1],B11:B10	; get RILC (B11) and ILC (B10)
+	LDDW	.D2T2	*++SP[1],B13:B12	; get PC (B13) and CSR (B12)
+
+	ADDAW	.D1X	SP,30,A15
+
+	LDDW	.D1T1	*++A15[1],A17:A16
+ ||	LDDW	.D2T2	*++SP[1],B17:B16
+	LDDW	.D1T1	*++A15[1],A19:A18
+ ||	LDDW	.D2T2	*++SP[1],B19:B18
+	LDDW	.D1T1	*++A15[1],A21:A20
+ ||	LDDW	.D2T2	*++SP[1],B21:B20
+	LDDW	.D1T1	*++A15[1],A23:A22
+ ||	LDDW	.D2T2	*++SP[1],B23:B22
+	LDDW	.D1T1	*++A15[1],A25:A24
+ ||	LDDW	.D2T2	*++SP[1],B25:B24
+	LDDW	.D1T1	*++A15[1],A27:A26
+ ||	LDDW	.D2T2	*++SP[1],B27:B26
+	LDDW	.D1T1	*++A15[1],A29:A28
+ ||	LDDW	.D2T2	*++SP[1],B29:B28
+	LDDW	.D1T1	*++A15[1],A31:A30
+ ||	LDDW	.D2T2	*++SP[1],B31:B30
+
+	LDDW	.D1T1	*++A15[1],A1:A0
+ ||	LDDW	.D2T2	*++SP[1],B1:B0
+
+	LDDW	.D1T1	*++A15[1],A3:A2
+ ||	LDDW	.D2T2	*++SP[1],B3:B2
+ ||	MVC	.S2	B9,__tsr
+	LDDW	.D1T1	*++A15[1],A5:A4
+ ||	LDDW	.D2T2	*++SP[1],B5:B4
+ ||	MVC	.S2	B11,RILC
+	LDDW	.D1T1	*++A15[1],A7:A6
+ ||	LDDW	.D2T2	*++SP[1],B7:B6
+ ||	MVC	.S2	B10,ILC
+
+	LDDW	.D1T1	*++A15[1],A9:A8
+ ||	LDDW	.D2T2	*++SP[1],B9:B8
+ ||	MVC	.S2	B13,__rp
+
+	LDDW	.D1T1	*++A15[1],A11:A10
+ ||	LDDW	.D2T2	*++SP[1],B11:B10
+ ||	MVC	.S2	B12,CSR
+
+	LDDW	.D1T1	*++A15[1],A13:A12
+ ||	LDDW	.D2T2	*++SP[1],B13:B12
+
+	MV	.D2X	A15,SP
+ ||	MVKL	.S1	current_ksp,A15
+	MVKH	.S1	current_ksp,A15
+ ||	ADDAW	.D1X	SP,6,A14
+	STW	.D1T1	A14,*A15	; save kernel stack pointer
+
+	LDDW	.D2T1	*++SP[1],A15:A14
+
+	B	.S2	__rp		; return from interruption
+	LDDW	.D2T2	*+SP[1],SP:DP
+	NOP	4
+	.endm
+
+	.section .text
+
+	;;
+	;; Jump to schedule() then return to ret_from_exception
+	;;
+_reschedule:
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	schedule,A0
+	MVKH	.S1	schedule,A0
+	B	.S2X	A0
+#else
+	B	.S1	schedule
+#endif
+	ADDKPC	.S2	ret_from_exception,B3,4
+
+	;;
+	;; Called before syscall handler when process is being debugged
+	;;
+tracesys_on:
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	syscall_trace_entry,A0
+	MVKH	.S1	syscall_trace_entry,A0
+	B	.S2X	A0
+#else
+	B	.S1	syscall_trace_entry
+#endif
+	ADDKPC	.S2	ret_from_syscall_trace,B3,3
+	ADD	.S1X	8,SP,A4
+
+ret_from_syscall_trace:
+	;; tracing returns (possibly new) syscall number
+	MV	.D2X	A4,B0
+ ||	MVK	.S2	__NR_syscalls,B1
+	CMPLTU	.L2	B0,B1,B1
+
+ [!B1]	BNOP	.S2	ret_from_syscall_function,5
+ ||	MVK	.S1	-ENOSYS,A4
+
+	;; reload syscall args from (possibly modified) stack frame
+	;; and get syscall handler addr from sys_call_table:
+	LDW	.D2T2	*+SP(REGS_B4+8),B4
+ ||	MVKL	.S2	sys_call_table,B1
+	LDW	.D2T1	*+SP(REGS_A6+8),A6
+ ||	MVKH	.S2	sys_call_table,B1
+	LDW	.D2T2	*+B1[B0],B0
+ ||	MVKL	.S2	ret_from_syscall_function,B3
+	LDW	.D2T2	*+SP(REGS_B6+8),B6
+ ||	MVKH	.S2	ret_from_syscall_function,B3
+	LDW	.D2T1	*+SP(REGS_A8+8),A8
+	LDW	.D2T2	*+SP(REGS_B8+8),B8
+	NOP
+	; B0 = sys_call_table[__NR_*]
+	BNOP	.S2	B0,5			; branch to syscall handler
+ ||	LDW	.D2T1	*+SP(REGS_ORIG_A4+8),A4
+
+syscall_exit_work:
+	AND	.D1	_TIF_SYSCALL_TRACE,A2,A0
+ [!A0]	BNOP	.S1	work_pending,5
+ [A0]	B	.S2	syscall_trace_exit
+	ADDKPC	.S2	resume_userspace,B3,1
+	MVC	.S2	CSR,B1
+	SET	.S2	B1,0,0,B1
+	MVC	.S2	B1,CSR		; enable ints
+
+work_pending:
+	AND	.D1	_TIF_NEED_RESCHED,A2,A0
+ [!A0]	BNOP	.S1	work_notifysig,5
+
+work_resched:
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	schedule,A1
+	MVKH	.S1	schedule,A1
+	B	.S2X	A1
+#else
+	B	.S2	schedule
+#endif
+	ADDKPC	.S2	work_rescheduled,B3,4
+work_rescheduled:
+	;; make sure we don't miss an interrupt setting need_resched or
+	;; sigpending between sampling and the rti
+	MASK_INT B2
+	GET_THREAD_INFO A12
+	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
+	MVK	.S1	_TIF_WORK_MASK,A1
+	MVK	.S1	_TIF_NEED_RESCHED,A3
+	NOP	2
+	AND	.D1	A1,A2,A0
+ ||	AND	.S1	A3,A2,A1
+ [!A0]	BNOP	.S1	restore_all,5
+ [A1]	BNOP	.S1	work_resched,5
+
+work_notifysig:
+	B	.S2	do_notify_resume
+	LDW	.D2T1	*+SP(REGS__END+8),A6 ; syscall flag
+	ADDKPC	.S2	resume_userspace,B3,1
+	ADD	.S1X	8,SP,A4		; pt_regs pointer is first arg
+	MV	.D2X	A2,B4		; thread_info flags is second arg
+
+	;;
+	;; On C64x+, the return way from exception and interrupt
+	;; is a little bit different
+	;;
+ENTRY(ret_from_exception)
+#ifdef CONFIG_PREEMPT
+	MASK_INT B2
+#endif
+
+ENTRY(ret_from_interrupt)
+	;;
+	;; Check if we are comming from user mode.
+	;;
+	LDW	.D2T2	*+SP(REGS_TSR+8),B0
+	MVK	.S2	0x40,B1
+	NOP	3
+	AND	.D2	B0,B1,B0
+ [!B0]	BNOP	.S2	resume_kernel,5
+
+resume_userspace:
+	;; make sure we don't miss an interrupt setting need_resched or
+	;; sigpending between sampling and the rti
+	MASK_INT B2
+	GET_THREAD_INFO A12
+	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
+	MVK	.S1	_TIF_WORK_MASK,A1
+	MVK	.S1	_TIF_NEED_RESCHED,A3
+	NOP	2
+	AND	.D1	A1,A2,A0
+ [A0]	BNOP	.S1	work_pending,5
+	BNOP	.S1	restore_all,5
+
+	;;
+	;; System call handling
+	;; B0 = syscall number (in sys_call_table)
+	;; A4,B4,A6,B6,A8,B8 = arguments of the syscall function
+	;; A4 is the return value register
+	;;
+system_call_saved:
+	MVK	.L2	1,B2
+	STW	.D2T2	B2,*+SP(REGS__END+8)	; set syscall flag
+	MVC	.S2	B2,ECR			; ack the software exception
+
+	UNMASK_INT B2			; re-enable global IT
+
+system_call_saved_noack:
+	;; Check system call number
+	MVK	.S2	__NR_syscalls,B1
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_ni_syscall,A0
+#endif
+	CMPLTU	.L2	B0,B1,B1
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKH	.S1	sys_ni_syscall,A0
+#endif
+
+	;; Check for ptrace
+	GET_THREAD_INFO A12
+
+#ifdef CONFIG_C6X_BIG_KERNEL
+ [!B1]	B	.S2X	A0
+#else
+ [!B1]	B	.S2	sys_ni_syscall
+#endif
+ [!B1]	ADDKPC	.S2	ret_from_syscall_function,B3,4
+
+	;; Get syscall handler addr from sys_call_table
+	;; call tracesys_on or call syscall handler
+	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
+ ||	MVKL	.S2	sys_call_table,B1
+	MVKH	.S2	sys_call_table,B1
+	LDW	.D2T2	*+B1[B0],B0
+	NOP	2
+	; A2 = thread_info flags
+	AND	.D1	_TIF_SYSCALL_TRACE,A2,A2
+ [A2]	BNOP	.S1	tracesys_on,5
+	;; B0 = _sys_call_table[__NR_*]
+	B	.S2	B0
+	ADDKPC	.S2	ret_from_syscall_function,B3,4
+
+ret_from_syscall_function:
+	STW	.D2T1	A4,*+SP(REGS_A4+8)	; save return value in A4
+						; original A4 is in orig_A4
+syscall_exit:
+	;; make sure we don't miss an interrupt setting need_resched or
+	;; sigpending between sampling and the rti
+	MASK_INT B2
+	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
+	MVK	.S1	_TIF_ALLWORK_MASK,A1
+	NOP	3
+	AND	.D1	A1,A2,A2 ; check for work to do
+ [A2]	BNOP	.S1	syscall_exit_work,5
+
+restore_all:
+	RESTORE_ALL NRP,NTSR
+
+	;;
+	;; After a fork we jump here directly from resume,
+	;; so that A4 contains the previous task structure.
+	;;
+ENTRY(ret_from_fork)
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	schedule_tail,A0
+	MVKH	.S1	schedule_tail,A0
+	B	.S2X	A0
+#else
+	B	.S2	schedule_tail
+#endif
+	ADDKPC	.S2	ret_from_fork_2,B3,4
+ret_from_fork_2:
+	;; return 0 in A4 for child process
+	GET_THREAD_INFO A12
+	BNOP	.S2	syscall_exit,3
+	MVK	.L2	0,B0
+	STW	.D2T2	B0,*+SP(REGS_A4+8)
+ENDPROC(ret_from_fork)
+
+	;;
+	;; These are the interrupt handlers, responsible for calling __do_IRQ()
+	;; int6 is used for syscalls (see _system_call entry)
+	;;
+	.macro SAVE_ALL_INT
+	SAVE_ALL IRP,ITSR
+	.endm
+
+	.macro CALL_INT int
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	c6x_do_IRQ,A0
+	MVKH	.S1	c6x_do_IRQ,A0
+	BNOP	.S2X	A0,1
+	MVK	.S1	int,A4
+	ADDAW	.D2	SP,2,B4
+	MVKL	.S2	ret_from_interrupt,B3
+	MVKH	.S2	ret_from_interrupt,B3
+#else
+	CALLP   .S2	c6x_do_IRQ,B3
+ ||	MVK	.S1	int,A4
+ ||	ADDAW	.D2	SP,2,B4
+	B	.S1	ret_from_interrupt
+	NOP	5
+#endif
+	.endm
+
+ENTRY(_int4_handler)
+	SAVE_ALL_INT
+	CALL_INT 4
+ENDPROC(_int4_handler)
+
+ENTRY(_int5_handler)
+	SAVE_ALL_INT
+	CALL_INT 5
+ENDPROC(_int5_handler)
+
+ENTRY(_int6_handler)
+	SAVE_ALL_INT
+	CALL_INT 6
+ENDPROC(_int6_handler)
+
+ENTRY(_int7_handler)
+	SAVE_ALL_INT
+	CALL_INT 7
+ENDPROC(_int7_handler)
+
+ENTRY(_int8_handler)
+	SAVE_ALL_INT
+	CALL_INT 8
+ENDPROC(_int8_handler)
+
+ENTRY(_int9_handler)
+	SAVE_ALL_INT
+	CALL_INT 9
+ENDPROC(_int9_handler)
+
+ENTRY(_int10_handler)
+	SAVE_ALL_INT
+	CALL_INT 10
+ENDPROC(_int10_handler)
+
+ENTRY(_int11_handler)
+	SAVE_ALL_INT
+	CALL_INT 11
+ENDPROC(_int11_handler)
+
+ENTRY(_int12_handler)
+	SAVE_ALL_INT
+	CALL_INT 12
+ENDPROC(_int12_handler)
+
+ENTRY(_int13_handler)
+	SAVE_ALL_INT
+	CALL_INT 13
+ENDPROC(_int13_handler)
+
+ENTRY(_int14_handler)
+	SAVE_ALL_INT
+	CALL_INT 14
+ENDPROC(_int14_handler)
+
+ENTRY(_int15_handler)
+	SAVE_ALL_INT
+	CALL_INT 15
+ENDPROC(_int15_handler)
+
+	;;
+	;; Handler for uninitialized and spurious interrupts
+	;;
+ENTRY(_bad_interrupt)
+	B	.S2	IRP
+	NOP	5
+ENDPROC(_bad_interrupt)
+
+	;;
+	;; Entry for NMI/exceptions/syscall
+	;;
+ENTRY(_nmi_handler)
+	SAVE_ALL NRP,NTSR
+
+	MVC	.S2	EFR,B2
+	CMPEQ	.L2	1,B2,B2
+ ||	MVC	.S2	TSR,B1
+	CLR	.S2	B1,10,10,B1
+	MVC	.S2	B1,TSR
+#ifdef CONFIG_C6X_BIG_KERNEL
+ [!B2]	MVKL	.S1	process_exception,A0
+ [!B2]	MVKH	.S1	process_exception,A0
+ [!B2]	B	.S2X	A0
+#else
+ [!B2]	B	.S2	process_exception
+#endif
+ [B2]	B	.S2	system_call_saved
+ [!B2]	ADDAW	.D2	SP,2,B1
+ [!B2]	MV	.D1X	B1,A4
+	ADDKPC	.S2	ret_from_trap,B3,2
+
+ret_from_trap:
+	MV	.D2X	A4,B0
+ [!B0]	BNOP	.S2	ret_from_exception,5
+
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S2	system_call_saved_noack,B3
+	MVKH	.S2	system_call_saved_noack,B3
+#endif
+	LDW	.D2T2	*+SP(REGS_B0+8),B0
+	LDW	.D2T1	*+SP(REGS_A4+8),A4
+	LDW	.D2T2	*+SP(REGS_B4+8),B4
+	LDW	.D2T1	*+SP(REGS_A6+8),A6
+	LDW	.D2T2	*+SP(REGS_B6+8),B6
+	LDW	.D2T1	*+SP(REGS_A8+8),A8
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	B	.S2	B3
+#else
+ ||	B	.S2	system_call_saved_noack
+#endif
+	LDW	.D2T2	*+SP(REGS_B8+8),B8
+	NOP	4
+ENDPROC(_nmi_handler)
+
+	;;
+	;; Jump to schedule() then return to ret_from_isr
+	;;
+#ifdef	CONFIG_PREEMPT
+resume_kernel:
+	GET_THREAD_INFO A12
+	LDW	.D1T1	*+A12(THREAD_INFO_PREEMPT_COUNT),A1
+	NOP	4
+ [A1]	BNOP	.S2	restore_all,5
+
+preempt_schedule:
+	GET_THREAD_INFO A2
+	LDW	.D1T1	*+A2(THREAD_INFO_FLAGS),A1
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S2	preempt_schedule_irq,B0
+	MVKH	.S2	preempt_schedule_irq,B0
+	NOP	2
+#else
+	NOP	4
+#endif
+	AND	.D1	_TIF_NEED_RESCHED,A1,A1
+ [!A1]	BNOP	.S2	restore_all,5
+#ifdef CONFIG_C6X_BIG_KERNEL
+	B	.S2	B0
+#else
+	B	.S2	preempt_schedule_irq
+#endif
+	ADDKPC	.S2	preempt_schedule,B3,4
+#endif /* CONFIG_PREEMPT */
+
+ENTRY(enable_exception)
+	DINT
+	MVC	.S2	TSR,B0
+	MVC	.S2	B3,NRP
+	MVK	.L2	0xc,B1
+	OR	.D2	B0,B1,B0
+	MVC	.S2	B0,TSR			;  Set GEE and XEN in TSR
+	B	.S2	NRP
+	NOP	5
+ENDPROC(enable_exception)
+
+ENTRY(sys_sigaltstack)
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	do_sigaltstack,A0	; branch to do_sigaltstack
+	MVKH	.S1	do_sigaltstack,A0
+	B	.S2X	A0
+#else
+	B	.S2	do_sigaltstack
+#endif
+	LDW	.D2T1	*+SP(REGS_SP+8),A6
+	NOP	4
+ENDPROC(sys_sigaltstack)
+
+	;; kernel_execve
+ENTRY(kernel_execve)
+	MVK	.S2	__NR_execve,B0
+	SWE
+	BNOP	.S2	B3,5
+ENDPROC(kernel_execve)
+
+	;;
+	;; Special system calls
+	;; return address is in B3
+	;;
+ENTRY(sys_clone)
+	ADD	.D1X	SP,8,A4
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_c6x_clone,A0
+	MVKH	.S1	sys_c6x_clone,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	sys_c6x_clone
+	NOP	5
+#endif
+ENDPROC(sys_clone)
+
+ENTRY(sys_rt_sigreturn)
+	ADD	.D1X	SP,8,A4
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	do_rt_sigreturn,A0
+	MVKH	.S1	do_rt_sigreturn,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	do_rt_sigreturn
+	NOP	5
+#endif
+ENDPROC(sys_rt_sigreturn)
+
+ENTRY(sys_execve)
+	ADDAW	.D2	SP,2,B6		; put regs addr in 4th parameter
+					; & adjust regs stack addr
+	LDW	.D2T2	*+SP(REGS_B4+8),B4
+
+	;; c6x_execve(char *name, char **argv,
+	;;            char **envp, struct pt_regs *regs)
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_c6x_execve,A0
+	MVKH	.S1	sys_c6x_execve,A0
+	B	.S2X	A0
+#else
+ ||	B	.S2	sys_c6x_execve
+#endif
+	STW	.D2T2	B3,*SP--[2]
+	ADDKPC	.S2	ret_from_c6x_execve,B3,3
+
+ret_from_c6x_execve:
+	LDW	.D2T2	*++SP[2],B3
+	NOP	4
+	BNOP	.S2	B3,5
+ENDPROC(sys_execve)
+
+ENTRY(sys_pread_c6x)
+	MV	.D2X	A8,B7
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_pread64,A0
+	MVKH	.S1	sys_pread64,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	sys_pread64
+	NOP	5
+#endif
+ENDPROC(sys_pread_c6x)
+
+ENTRY(sys_pwrite_c6x)
+	MV	.D2X	A8,B7
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_pwrite64,A0
+	MVKH	.S1	sys_pwrite64,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	sys_pwrite64
+	NOP	5
+#endif
+ENDPROC(sys_pwrite_c6x)
+
+;; On Entry
+;;   A4 - path
+;;   B4 - offset_lo (LE), offset_hi (BE)
+;;   A6 - offset_lo (BE), offset_hi (LE)
+ENTRY(sys_truncate64_c6x)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	MV	.S2	B4,B5
+	MV	.D2X	A6,B4
+#else
+	MV	.D2X	A6,B5
+#endif
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_truncate64,A0
+	MVKH	.S1	sys_truncate64,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	sys_truncate64
+	NOP	5
+#endif
+ENDPROC(sys_truncate64_c6x)
+
+;; On Entry
+;;   A4 - fd
+;;   B4 - offset_lo (LE), offset_hi (BE)
+;;   A6 - offset_lo (BE), offset_hi (LE)
+ENTRY(sys_ftruncate64_c6x)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	MV	.S2	B4,B5
+	MV	.D2X	A6,B4
+#else
+	MV	.D2X	A6,B5
+#endif
+#ifdef CONFIG_C6X_BIG_KERNEL
+ ||	MVKL	.S1	sys_ftruncate64,A0
+	MVKH	.S1	sys_ftruncate64,A0
+	BNOP	.S2X	A0,5
+#else
+ ||	B	.S2	sys_ftruncate64
+	NOP	5
+#endif
+ENDPROC(sys_ftruncate64_c6x)
+
+#ifdef __ARCH_WANT_SYSCALL_OFF_T
+;; On Entry
+;;   A4 - fd
+;;   B4 - offset_lo (LE), offset_hi (BE)
+;;   A6 - offset_lo (BE), offset_hi (LE)
+;;   B6 - len
+;;   A8 - advice
+ENTRY(sys_fadvise64_c6x)
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	sys_fadvise64,A0
+	MVKH	.S1	sys_fadvise64,A0
+	BNOP	.S2X	A0,2
+#else
+	B	.S2	sys_fadvise64
+	NOP	2
+#endif
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	MV	.L2	B4,B5
+ ||	MV	.D2X	A6,B4
+#else
+	MV	.D2X	A6,B5
+#endif
+	MV	.D1X	B6,A6
+	MV	.D2X	A8,B6
+#endif
+ENDPROC(sys_fadvise64_c6x)
+
+;; On Entry
+;;   A4 - fd
+;;   B4 - offset_lo (LE), offset_hi (BE)
+;;   A6 - offset_lo (BE), offset_hi (LE)
+;;   B6 - len_lo (LE), len_hi (BE)
+;;   A8 - len_lo (BE), len_hi (LE)
+;;   B8 - advice
+ENTRY(sys_fadvise64_64_c6x)
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	sys_fadvise64_64,A0
+	MVKH	.S1	sys_fadvise64_64,A0
+	BNOP	.S2X	A0,2
+#else
+	B	.S2	sys_fadvise64_64
+	NOP	2
+#endif
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	MV	.L2	B4,B5
+ ||	MV	.D2X	A6,B4
+	MV	.L1	A8,A6
+ ||	MV	.D1X	B6,A7
+#else
+	MV	.D2X	A6,B5
+	MV	.L1	A8,A7
+ ||	MV	.D1X	B6,A6
+#endif
+	MV	.L2	B8,B6
+ENDPROC(sys_fadvise64_64_c6x)
+
+;; On Entry
+;;   A4 - fd
+;;   B4 - mode
+;;   A6 - offset_hi
+;;   B6 - offset_lo
+;;   A8 - len_hi
+;;   B8 - len_lo
+ENTRY(sys_fallocate_c6x)
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	sys_fallocate,A0
+	MVKH	.S1	sys_fallocate,A0
+	BNOP	.S2X	A0,1
+#else
+	B	.S2	sys_fallocate
+	NOP
+#endif
+	MV	.D1	A6,A7
+	MV	.D1X	B6,A6
+	MV	.D2X	A8,B7
+	MV	.D2	B8,B6
+ENDPROC(sys_fallocate_c6x)
+
+	;; put this in .neardata for faster access when using DSBT mode
+	.section .neardata,"aw",@progbits
+	.global	current_ksp
+	.hidden	current_ksp
+current_ksp:
+	.word	init_thread_union + THREAD_START_SP
diff --git a/arch/c6x/kernel/head.S b/arch/c6x/kernel/head.S
new file mode 100644
index 0000000..133eab6
--- /dev/null
+++ b/arch/c6x/kernel/head.S
@@ -0,0 +1,84 @@
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+#include <linux/of_fdt.h>
+#include <asm/asm-offsets.h>
+
+	__HEAD
+ENTRY(_c_int00)
+	;; Save magic and pointer
+	MV	.S1	A4,A10
+	MV	.S2	B4,B10
+	MVKL	.S2	__bss_start,B5
+	MVKH	.S2	__bss_start,B5
+	MVKL	.S2	__bss_stop,B6
+	MVKH	.S2	__bss_stop,B6
+	SUB	.L2	B6,B5,B6 ; bss size
+
+	;; Set the stack pointer
+	MVKL	.S2	current_ksp,B0
+	MVKH	.S2	current_ksp,B0
+	LDW	.D2T2	*B0,B15
+
+	;; clear bss
+	SHR	.S2	B6,3,B0	  ; number of dwords to clear
+	ZERO	.L2	B13
+	ZERO	.L2	B12
+bss_loop:
+	BDEC	.S2	bss_loop,B0
+	NOP	3
+	CMPLT	.L2	B0,0,B1
+ [!B1]	STDW	.D2T2	B13:B12,*B5++[1]
+
+	NOP	4
+	AND	.D2	~7,B15,B15
+
+	;; Clear GIE and PGIE
+	MVC	.S2	CSR,B2
+	CLR	.S2	B2,0,1,B2
+	MVC	.S2	B2,CSR
+	MVC	.S2	TSR,B2
+	CLR	.S2	B2,0,1,B2
+	MVC	.S2	B2,TSR
+	MVC	.S2	ITSR,B2
+	CLR	.S2	B2,0,1,B2
+	MVC	.S2	B2,ITSR
+	MVC	.S2	NTSR,B2
+	CLR	.S2	B2,0,1,B2
+	MVC	.S2	B2,NTSR
+
+	;; pass DTB pointer to machine_init (or zero if none)
+	MVKL	.S1	OF_DT_HEADER,A0
+	MVKH	.S1	OF_DT_HEADER,A0
+	CMPEQ	.L1	A10,A0,A0
+  [A0]	MV	.S1X	B10,A4
+  [!A0] MVK	.S1	0,A4
+
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	machine_init,A0
+	MVKH	.S1	machine_init,A0
+	B	.S2X	A0
+	ADDKPC  .S2     0f,B3,4
+0:
+#else
+	CALLP	.S2	machine_init,B3
+#endif
+
+	;; Jump to Linux init
+#ifdef CONFIG_C6X_BIG_KERNEL
+	MVKL	.S1	start_kernel,A0
+	MVKH	.S1	start_kernel,A0
+	B	.S2X	A0
+#else
+	B	.S2	start_kernel
+#endif
+	NOP	5
+L1:	BNOP	.S2	L1,5
diff --git a/arch/c6x/kernel/irq.c b/arch/c6x/kernel/irq.c
new file mode 100644
index 0000000..0929e4b
--- /dev/null
+++ b/arch/c6x/kernel/irq.c
@@ -0,0 +1,728 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ *  This borrows heavily from powerpc version, which is:
+ *
+ *  Derived from arch/i386/kernel/irq.c
+ *    Copyright (C) 1992 Linus Torvalds
+ *  Adapted from arch/i386 by Gary Thomas
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ *    Copyright (C) 1996-2001 Cort Dougan
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/radix-tree.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/megamod-pic.h>
+
+unsigned long irq_err_count;
+
+static DEFINE_RAW_SPINLOCK(core_irq_lock);
+
+static void mask_core_irq(struct irq_data *data)
+{
+	unsigned int prio = data->irq;
+
+	BUG_ON(prio < 4 || prio >= NR_PRIORITY_IRQS);
+
+	raw_spin_lock(&core_irq_lock);
+	and_creg(IER, ~(1 << prio));
+	raw_spin_unlock(&core_irq_lock);
+}
+
+static void unmask_core_irq(struct irq_data *data)
+{
+	unsigned int prio = data->irq;
+
+	raw_spin_lock(&core_irq_lock);
+	or_creg(IER, 1 << prio);
+	raw_spin_unlock(&core_irq_lock);
+}
+
+static struct irq_chip core_chip = {
+	.name		= "core",
+	.irq_mask	= mask_core_irq,
+	.irq_unmask	= unmask_core_irq,
+};
+
+asmlinkage void c6x_do_IRQ(unsigned int prio, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	irq_enter();
+
+	BUG_ON(prio < 4 || prio >= NR_PRIORITY_IRQS);
+
+	generic_handle_irq(prio);
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+static struct irq_host *core_host;
+
+static int core_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	if (hw < 4 || hw >= NR_PRIORITY_IRQS)
+		return -EINVAL;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &core_chip, handle_level_irq);
+	return 0;
+}
+
+static struct irq_host_ops core_host_ops = {
+	.map = core_host_map,
+};
+
+void __init init_IRQ(void)
+{
+	struct device_node *np;
+
+	/* Mask all priority IRQs */
+	and_creg(IER, ~0xfff0);
+
+	np = of_find_compatible_node(NULL, NULL, "ti,c64x+core-pic");
+	if (np != NULL) {
+		/* create the core host */
+		core_host = irq_alloc_host(np, IRQ_HOST_MAP_PRIORITY, 0,
+					   &core_host_ops, 0);
+		if (core_host)
+			irq_set_default_host(core_host);
+		of_node_put(np);
+	}
+
+	printk(KERN_INFO "Core interrupt controller initialized\n");
+
+	/* now we're ready for other SoC controllers */
+	megamod_pic_init();
+
+	/* Clear all general IRQ flags */
+	set_creg(ICR, 0xfff0);
+}
+
+void ack_bad_irq(int irq)
+{
+	printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
+	irq_err_count++;
+}
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
+	return 0;
+}
+
+/*
+ * IRQ controller and virtual interrupts
+ */
+
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+
+static LIST_HEAD(irq_hosts);
+static DEFINE_RAW_SPINLOCK(irq_big_lock);
+static DEFINE_MUTEX(revmap_trees_mutex);
+static struct irq_map_entry irq_map[NR_IRQS];
+static unsigned int irq_virq_count = NR_IRQS;
+static struct irq_host *irq_default_host;
+
+irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
+{
+	return irq_map[d->irq].hwirq;
+}
+EXPORT_SYMBOL_GPL(irqd_to_hwirq);
+
+irq_hw_number_t virq_to_hw(unsigned int virq)
+{
+	return irq_map[virq].hwirq;
+}
+EXPORT_SYMBOL_GPL(virq_to_hw);
+
+bool virq_is_host(unsigned int virq, struct irq_host *host)
+{
+	return irq_map[virq].host == host;
+}
+EXPORT_SYMBOL_GPL(virq_is_host);
+
+static int default_irq_host_match(struct irq_host *h, struct device_node *np)
+{
+	return h->of_node != NULL && h->of_node == np;
+}
+
+struct irq_host *irq_alloc_host(struct device_node *of_node,
+				unsigned int revmap_type,
+				unsigned int revmap_arg,
+				struct irq_host_ops *ops,
+				irq_hw_number_t inval_irq)
+{
+	struct irq_host *host;
+	unsigned int size = sizeof(struct irq_host);
+	unsigned int i;
+	unsigned int *rmap;
+	unsigned long flags;
+
+	/* Allocate structure and revmap table if using linear mapping */
+	if (revmap_type == IRQ_HOST_MAP_LINEAR)
+		size += revmap_arg * sizeof(unsigned int);
+	host = kzalloc(size, GFP_KERNEL);
+	if (host == NULL)
+		return NULL;
+
+	/* Fill structure */
+	host->revmap_type = revmap_type;
+	host->inval_irq = inval_irq;
+	host->ops = ops;
+	host->of_node = of_node_get(of_node);
+
+	if (host->ops->match == NULL)
+		host->ops->match = default_irq_host_match;
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* Check for the priority controller. */
+	if (revmap_type == IRQ_HOST_MAP_PRIORITY) {
+		if (irq_map[0].host != NULL) {
+			raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+			of_node_put(host->of_node);
+			kfree(host);
+			return NULL;
+		}
+		irq_map[0].host = host;
+	}
+
+	list_add(&host->link, &irq_hosts);
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+
+	/* Additional setups per revmap type */
+	switch (revmap_type) {
+	case IRQ_HOST_MAP_PRIORITY:
+		/* 0 is always the invalid number for priority */
+		host->inval_irq = 0;
+		/* setup us as the host for all priority interrupts */
+		for (i = 1; i < NR_PRIORITY_IRQS; i++) {
+			irq_map[i].hwirq = i;
+			smp_wmb();
+			irq_map[i].host = host;
+			smp_wmb();
+
+			ops->map(host, i, i);
+		}
+		break;
+	case IRQ_HOST_MAP_LINEAR:
+		rmap = (unsigned int *)(host + 1);
+		for (i = 0; i < revmap_arg; i++)
+			rmap[i] = NO_IRQ;
+		host->revmap_data.linear.size = revmap_arg;
+		smp_wmb();
+		host->revmap_data.linear.revmap = rmap;
+		break;
+	case IRQ_HOST_MAP_TREE:
+		INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
+		break;
+	default:
+		break;
+	}
+
+	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
+
+	return host;
+}
+
+struct irq_host *irq_find_host(struct device_node *node)
+{
+	struct irq_host *h, *found = NULL;
+	unsigned long flags;
+
+	/* We might want to match the legacy controller last since
+	 * it might potentially be set to match all interrupts in
+	 * the absence of a device node. This isn't a problem so far
+	 * yet though...
+	 */
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+	list_for_each_entry(h, &irq_hosts, link)
+		if (h->ops->match(h, node)) {
+			found = h;
+			break;
+		}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+EXPORT_SYMBOL_GPL(irq_find_host);
+
+void irq_set_default_host(struct irq_host *host)
+{
+	pr_debug("irq: Default host set to @0x%p\n", host);
+
+	irq_default_host = host;
+}
+
+void irq_set_virq_count(unsigned int count)
+{
+	pr_debug("irq: Trying to set virq count to %d\n", count);
+
+	BUG_ON(count < NR_PRIORITY_IRQS);
+	if (count < NR_IRQS)
+		irq_virq_count = count;
+}
+
+static int irq_setup_virq(struct irq_host *host, unsigned int virq,
+			    irq_hw_number_t hwirq)
+{
+	int res;
+
+	res = irq_alloc_desc_at(virq, 0);
+	if (res != virq) {
+		pr_debug("irq: -> allocating desc failed\n");
+		goto error;
+	}
+
+	/* map it */
+	smp_wmb();
+	irq_map[virq].hwirq = hwirq;
+	smp_mb();
+
+	if (host->ops->map(host, virq, hwirq)) {
+		pr_debug("irq: -> mapping failed, freeing\n");
+		goto errdesc;
+	}
+
+	irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
+	return 0;
+
+errdesc:
+	irq_free_descs(virq, 1);
+error:
+	irq_free_virt(virq, 1);
+	return -1;
+}
+
+unsigned int irq_create_direct_mapping(struct irq_host *host)
+{
+	unsigned int virq;
+
+	if (host == NULL)
+		host = irq_default_host;
+
+	BUG_ON(host == NULL);
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
+
+	virq = irq_alloc_virt(host, 1, 0);
+	if (virq == NO_IRQ) {
+		pr_debug("irq: create_direct virq allocation failed\n");
+		return NO_IRQ;
+	}
+
+	pr_debug("irq: create_direct obtained virq %d\n", virq);
+
+	if (irq_setup_virq(host, virq, virq))
+		return NO_IRQ;
+
+	return virq;
+}
+
+unsigned int irq_create_mapping(struct irq_host *host,
+				irq_hw_number_t hwirq)
+{
+	unsigned int virq, hint;
+
+	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL) {
+		printk(KERN_WARNING "irq_create_mapping called for"
+		       " NULL host, hwirq=%lx\n", hwirq);
+		WARN_ON(1);
+		return NO_IRQ;
+	}
+	pr_debug("irq: -> using host @%p\n", host);
+
+	/* Check if mapping already exists */
+	virq = irq_find_mapping(host, hwirq);
+	if (virq != NO_IRQ) {
+		pr_debug("irq: -> existing mapping on virq %d\n", virq);
+		return virq;
+	}
+
+	/* Allocate a virtual interrupt number */
+	hint = hwirq % irq_virq_count;
+	virq = irq_alloc_virt(host, 1, hint);
+	if (virq == NO_IRQ) {
+		pr_debug("irq: -> virq allocation failed\n");
+		return NO_IRQ;
+	}
+
+	if (irq_setup_virq(host, virq, hwirq))
+		return NO_IRQ;
+
+	pr_debug("irq: irq %lu on host %s mapped to virtual irq %u\n",
+		hwirq, host->of_node ? host->of_node->full_name : "null", virq);
+
+	return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_mapping);
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+				   const u32 *intspec, unsigned int intsize)
+{
+	struct irq_host *host;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	unsigned int virq;
+
+	if (controller == NULL)
+		host = irq_default_host;
+	else
+		host = irq_find_host(controller);
+	if (host == NULL) {
+		printk(KERN_WARNING "irq: no irq host found for %s !\n",
+		       controller->full_name);
+		return NO_IRQ;
+	}
+
+	/* If host has no translation, then we assume interrupt line */
+	if (host->ops->xlate == NULL)
+		hwirq = intspec[0];
+	else {
+		if (host->ops->xlate(host, controller, intspec, intsize,
+				     &hwirq, &type))
+			return NO_IRQ;
+	}
+
+	/* Create mapping */
+	virq = irq_create_mapping(host, hwirq);
+	if (virq == NO_IRQ)
+		return virq;
+
+	/* Set type if specified and different than the current one */
+	if (type != IRQ_TYPE_NONE &&
+	    type != (irqd_get_trigger_type(irq_get_irq_data(virq))))
+		irq_set_irq_type(virq, type);
+	return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+void irq_dispose_mapping(unsigned int virq)
+{
+	struct irq_host *host;
+	irq_hw_number_t hwirq;
+
+	if (virq == NO_IRQ)
+		return;
+
+	/* Never unmap priority interrupts */
+	if (virq < NR_PRIORITY_IRQS)
+		return;
+
+	host = irq_map[virq].host;
+	if (WARN_ON(host == NULL))
+		return;
+
+	irq_set_status_flags(virq, IRQ_NOREQUEST);
+
+	/* remove chip and handler */
+	irq_set_chip_and_handler(virq, NULL, NULL);
+
+	/* Make sure it's completed */
+	synchronize_irq(virq);
+
+	/* Tell the PIC about it */
+	if (host->ops->unmap)
+		host->ops->unmap(host, virq);
+	smp_mb();
+
+	/* Clear reverse map */
+	hwirq = irq_map[virq].hwirq;
+	switch (host->revmap_type) {
+	case IRQ_HOST_MAP_LINEAR:
+		if (hwirq < host->revmap_data.linear.size)
+			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
+		break;
+	case IRQ_HOST_MAP_TREE:
+		mutex_lock(&revmap_trees_mutex);
+		radix_tree_delete(&host->revmap_data.tree, hwirq);
+		mutex_unlock(&revmap_trees_mutex);
+		break;
+	}
+
+	/* Destroy map */
+	smp_mb();
+	irq_map[virq].hwirq = host->inval_irq;
+
+	irq_free_descs(virq, 1);
+	/* Free it */
+	irq_free_virt(virq, 1);
+}
+EXPORT_SYMBOL_GPL(irq_dispose_mapping);
+
+unsigned int irq_find_mapping(struct irq_host *host,
+			      irq_hw_number_t hwirq)
+{
+	unsigned int i;
+	unsigned int hint = hwirq % irq_virq_count;
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL)
+		return NO_IRQ;
+
+	/* Slow path does a linear search of the map */
+	i = hint;
+	do  {
+		if (irq_map[i].host == host &&
+		    irq_map[i].hwirq == hwirq)
+			return i;
+		i++;
+		if (i >= irq_virq_count)
+			i = 4;
+	} while (i != hint);
+	return NO_IRQ;
+}
+EXPORT_SYMBOL_GPL(irq_find_mapping);
+
+unsigned int irq_radix_revmap_lookup(struct irq_host *host,
+				     irq_hw_number_t hwirq)
+{
+	struct irq_map_entry *ptr;
+	unsigned int virq;
+
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
+		return irq_find_mapping(host, hwirq);
+
+	/*
+	 * The ptr returned references the static global irq_map.
+	 * but freeing an irq can delete nodes along the path to
+	 * do the lookup via call_rcu.
+	 */
+	rcu_read_lock();
+	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
+	rcu_read_unlock();
+
+	/*
+	 * If found in radix tree, then fine.
+	 * Else fallback to linear lookup - this should not happen in practice
+	 * as it means that we failed to insert the node in the radix tree.
+	 */
+	if (ptr)
+		virq = ptr - irq_map;
+	else
+		virq = irq_find_mapping(host, hwirq);
+
+	return virq;
+}
+
+void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+	if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
+		return;
+
+	if (virq != NO_IRQ) {
+		mutex_lock(&revmap_trees_mutex);
+		radix_tree_insert(&host->revmap_data.tree, hwirq,
+				  &irq_map[virq]);
+		mutex_unlock(&revmap_trees_mutex);
+	}
+}
+
+unsigned int irq_linear_revmap(struct irq_host *host,
+			       irq_hw_number_t hwirq)
+{
+	unsigned int *revmap;
+
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
+		return irq_find_mapping(host, hwirq);
+
+	/* Check revmap bounds */
+	if (unlikely(hwirq >= host->revmap_data.linear.size))
+		return irq_find_mapping(host, hwirq);
+
+	/* Check if revmap was allocated */
+	revmap = host->revmap_data.linear.revmap;
+	if (unlikely(revmap == NULL))
+		return irq_find_mapping(host, hwirq);
+
+	/* Fill up revmap with slow path if no mapping found */
+	if (unlikely(revmap[hwirq] == NO_IRQ))
+		revmap[hwirq] = irq_find_mapping(host, hwirq);
+
+	return revmap[hwirq];
+}
+
+unsigned int irq_alloc_virt(struct irq_host *host,
+			    unsigned int count,
+			    unsigned int hint)
+{
+	unsigned long flags;
+	unsigned int i, j, found = NO_IRQ;
+
+	if (count == 0 || count > (irq_virq_count - NR_PRIORITY_IRQS))
+		return NO_IRQ;
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* Use hint for 1 interrupt if any */
+	if (count == 1 && hint >= NR_PRIORITY_IRQS &&
+	    hint < irq_virq_count && irq_map[hint].host == NULL) {
+		found = hint;
+		goto hint_found;
+	}
+
+	/* Look for count consecutive numbers in the allocatable
+	 * (non-legacy) space
+	 */
+	for (i = NR_PRIORITY_IRQS, j = 0; i < irq_virq_count; i++) {
+		if (irq_map[i].host != NULL)
+			j = 0;
+		else
+			j++;
+
+		if (j == count) {
+			found = i - count + 1;
+			break;
+		}
+	}
+	if (found == NO_IRQ) {
+		raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+		return NO_IRQ;
+	}
+ hint_found:
+	for (i = found; i < (found + count); i++) {
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = host;
+	}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+
+void irq_free_virt(unsigned int virq, unsigned int count)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	WARN_ON(virq < NR_PRIORITY_IRQS);
+	WARN_ON(count == 0 || (virq + count) > irq_virq_count);
+
+	if (virq < NR_PRIORITY_IRQS) {
+		if (virq + count < NR_PRIORITY_IRQS)
+			return;
+		count  -= NR_PRIORITY_IRQS - virq;
+		virq = NR_PRIORITY_IRQS;
+	}
+
+	if (count > irq_virq_count || virq > irq_virq_count - count) {
+		if (virq > irq_virq_count)
+			return;
+		count = irq_virq_count - virq;
+	}
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+	for (i = virq; i < (virq + count); i++) {
+		struct irq_host *host;
+
+		host = irq_map[i].host;
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = NULL;
+	}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+}
+
+#ifdef CONFIG_VIRQ_DEBUG
+static int virq_debug_show(struct seq_file *m, void *private)
+{
+	unsigned long flags;
+	struct irq_desc *desc;
+	const char *p;
+	static const char none[] = "none";
+	void *data;
+	int i;
+
+	seq_printf(m, "%-5s  %-7s  %-15s  %-18s  %s\n", "virq", "hwirq",
+		      "chip name", "chip data", "host name");
+
+	for (i = 1; i < nr_irqs; i++) {
+		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		if (desc->action && desc->action->handler) {
+			struct irq_chip *chip;
+
+			seq_printf(m, "%5d  ", i);
+			seq_printf(m, "0x%05lx  ", irq_map[i].hwirq);
+
+			chip = irq_desc_get_chip(desc);
+			if (chip && chip->name)
+				p = chip->name;
+			else
+				p = none;
+			seq_printf(m, "%-15s  ", p);
+
+			data = irq_desc_get_chip_data(desc);
+			seq_printf(m, "0x%16p  ", data);
+
+			if (irq_map[i].host && irq_map[i].host->of_node)
+				p = irq_map[i].host->of_node->full_name;
+			else
+				p = none;
+			seq_printf(m, "%s\n", p);
+		}
+
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	return 0;
+}
+
+static int virq_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, virq_debug_show, inode->i_private);
+}
+
+static const struct file_operations virq_debug_fops = {
+	.open = virq_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init irq_debugfs_init(void)
+{
+	if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
+				 NULL, &virq_debug_fops) == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+device_initcall(irq_debugfs_init);
+#endif /* CONFIG_VIRQ_DEBUG */
diff --git a/arch/c6x/kernel/module.c b/arch/c6x/kernel/module.c
new file mode 100644
index 0000000..5fc03f1
--- /dev/null
+++ b/arch/c6x/kernel/module.c
@@ -0,0 +1,123 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2005, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Thomas Charleux (thomas.charleux@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel.h>
+
+static inline int fixup_pcr(u32 *ip, Elf32_Addr dest, u32 maskbits, int shift)
+{
+	u32 opcode;
+	long ep = (long)ip & ~31;
+	long delta = ((long)dest - ep) >> 2;
+	long mask = (1 << maskbits) - 1;
+
+	if ((delta >> (maskbits - 1)) == 0 ||
+	    (delta >> (maskbits - 1)) == -1) {
+		opcode = *ip;
+		opcode &= ~(mask << shift);
+		opcode |= ((delta & mask) << shift);
+		*ip = opcode;
+
+		pr_debug("REL PCR_S%d[%p] dest[%p] opcode[%08x]\n",
+			 maskbits, ip, (void *)dest, opcode);
+
+		return 0;
+	}
+	pr_err("PCR_S%d reloc %p -> %p out of range!\n",
+	       maskbits, ip, (void *)dest);
+
+	return -1;
+}
+
+/*
+ * apply a RELA relocation
+ */
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	Elf32_Rela *rel = (void *) sechdrs[relsec].sh_addr;
+	Elf_Sym *sym;
+	u32 *location, opcode;
+	unsigned int i;
+	Elf32_Addr v;
+	Elf_Addr offset = 0;
+
+	pr_debug("Applying relocate section %u to %u with offset 0x%x\n",
+		 relsec, sechdrs[relsec].sh_info, offset);
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset - offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		/* this is the adjustment to be made */
+		v = sym->st_value + rel[i].r_addend;
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_C6000_ABS32:
+			pr_debug("RELA ABS32: [%p] = 0x%x\n", location, v);
+			*location = v;
+			break;
+		case R_C6000_ABS16:
+			pr_debug("RELA ABS16: [%p] = 0x%x\n", location, v);
+			*(u16 *)location = v;
+			break;
+		case R_C6000_ABS8:
+			pr_debug("RELA ABS8: [%p] = 0x%x\n", location, v);
+			*(u8 *)location = v;
+			break;
+		case R_C6000_ABS_L16:
+			opcode = *location;
+			opcode &= ~0x7fff80;
+			opcode |= ((v & 0xffff) << 7);
+			pr_debug("RELA ABS_L16[%p] v[0x%x] opcode[0x%x]\n",
+				 location, v, opcode);
+			*location = opcode;
+			break;
+		case R_C6000_ABS_H16:
+			opcode = *location;
+			opcode &= ~0x7fff80;
+			opcode |= ((v >> 9) & 0x7fff80);
+			pr_debug("RELA ABS_H16[%p] v[0x%x] opcode[0x%x]\n",
+				 location, v, opcode);
+			*location = opcode;
+			break;
+		case R_C6000_PCR_S21:
+			if (fixup_pcr(location, v, 21, 7))
+				return -ENOEXEC;
+			break;
+		case R_C6000_PCR_S12:
+			if (fixup_pcr(location, v, 12, 16))
+				return -ENOEXEC;
+			break;
+		case R_C6000_PCR_S10:
+			if (fixup_pcr(location, v, 10, 13))
+				return -ENOEXEC;
+			break;
+		default:
+			pr_err("module %s: Unknown RELA relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
new file mode 100644
index 0000000..7ca8c41
--- /dev/null
+++ b/arch/c6x/kernel/process.c
@@ -0,0 +1,265 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/init_task.h>
+#include <linux/tick.h>
+#include <linux/mqueue.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+
+#include <asm/syscalls.h>
+
+/* hooks for board specific support */
+void	(*c6x_restart)(void);
+void	(*c6x_halt)(void);
+
+extern asmlinkage void ret_from_fork(void);
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+/*
+ * Initial thread structure.
+ */
+union thread_union init_thread_union __init_task_data =	{
+	INIT_THREAD_INFO(init_task)
+};
+
+/*
+ * Initial task structure.
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
+
+/*
+ * power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+static void c6x_idle(void)
+{
+	unsigned long tmp;
+
+	/*
+	 * Put local_irq_enable and idle in same execute packet
+	 * to make them atomic and avoid race to idle with
+	 * interrupts enabled.
+	 */
+	asm volatile ("   mvc .s2 CSR,%0\n"
+		      "   or  .d2 1,%0,%0\n"
+		      "   mvc .s2 %0,CSR\n"
+		      "|| idle\n"
+		      : "=b"(tmp));
+}
+
+/*
+ * The idle loop for C64x
+ */
+void cpu_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
+		while (1) {
+			local_irq_disable();
+			if (need_resched()) {
+				local_irq_enable();
+				break;
+			}
+			c6x_idle(); /* enables local irqs */
+		}
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
+
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+static void halt_loop(void)
+{
+	printk(KERN_EMERG "System Halted, OK to turn off power\n");
+	local_irq_disable();
+	while (1)
+		asm volatile("idle\n");
+}
+
+void machine_restart(char *__unused)
+{
+	if (c6x_restart)
+		c6x_restart();
+	halt_loop();
+}
+
+void machine_halt(void)
+{
+	if (c6x_halt)
+		c6x_halt();
+	halt_loop();
+}
+
+void machine_power_off(void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	halt_loop();
+}
+
+static void kernel_thread_helper(int dummy, void *arg, int (*fn)(void *))
+{
+	do_exit(fn(arg));
+}
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	/*
+	 * copy_thread sets a4 to zero (child return from fork)
+	 * so we can't just set things up to directly return to
+	 * fn.
+	 */
+	memset(&regs, 0, sizeof(regs));
+	regs.b4 = (unsigned long) arg;
+	regs.a6 = (unsigned long) fn;
+	regs.pc = (unsigned long) kernel_thread_helper;
+	local_save_flags(regs.csr);
+	regs.csr |= 1;
+	regs.tsr = 5; /* Set GEE and GIE in TSR */
+
+	/* Ok, create the new process.. */
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, -1, &regs,
+		       0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+void flush_thread(void)
+{
+}
+
+void exit_thread(void)
+{
+}
+
+SYSCALL_DEFINE1(c6x_clone, struct pt_regs *, regs)
+{
+	unsigned long clone_flags;
+	unsigned long newsp;
+
+	/* syscall puts clone_flags in A4 and usp in B4 */
+	clone_flags = regs->orig_a4;
+	if (regs->b4)
+		newsp = regs->b4;
+	else
+		newsp = regs->sp;
+
+	return do_fork(clone_flags, newsp, regs, 0, (int __user *)regs->a6,
+		       (int __user *)regs->b6);
+}
+
+/*
+ * Do necessary setup to start up a newly executed thread.
+ */
+void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp)
+{
+	/*
+	 * The binfmt loader will setup a "full" stack, but the C6X
+	 * operates an "empty" stack. So we adjust the usp so that
+	 * argc doesn't get destroyed if an interrupt is taken before
+	 * it is read from the stack.
+	 *
+	 * NB: Library startup code needs to match this.
+	 */
+	usp -= 8;
+
+	set_fs(USER_DS);
+	regs->pc  = pc;
+	regs->sp  = usp;
+	regs->tsr |= 0x40; /* set user mode */
+	current->thread.usp = usp;
+}
+
+/*
+ * Copy a new thread context in its stack.
+ */
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long ustk_size,
+		struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs;
+
+	childregs = task_pt_regs(p);
+
+	*childregs = *regs;
+	childregs->a4 = 0;
+
+	if (usp == -1)
+		/* case of  __kernel_thread: we return to supervisor space */
+		childregs->sp = (unsigned long)(childregs + 1);
+	else
+		/* Otherwise use the given stack */
+		childregs->sp = usp;
+
+	/* Set usp/ksp */
+	p->thread.usp = childregs->sp;
+	/* switch_to uses stack to save/restore 14 callee-saved regs */
+	thread_saved_ksp(p) = (unsigned long)childregs - 8;
+	p->thread.pc = (unsigned int) ret_from_fork;
+	p->thread.wchan	= (unsigned long) ret_from_fork;
+#ifdef __DSBT__
+	{
+		unsigned long dp;
+
+		asm volatile ("mv .S2 b14,%0\n" : "=b"(dp));
+
+		thread_saved_dp(p) = dp;
+		if (usp == -1)
+			childregs->dp = dp;
+	}
+#endif
+	return 0;
+}
+
+/*
+ * c6x_execve() executes a new program.
+ */
+SYSCALL_DEFINE4(c6x_execve, const char __user *, name,
+		const char __user *const __user *, argv,
+		const char __user *const __user *, envp,
+		struct pt_regs *, regs)
+{
+	int error;
+	char *filename;
+
+	filename = getname(name);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	return error;
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	return p->thread.wchan;
+}
diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c
new file mode 100644
index 0000000..3c494e8
--- /dev/null
+++ b/arch/c6x/kernel/ptrace.c
@@ -0,0 +1,187 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/cacheflush.h>
+
+#define PT_REG_SIZE	  (sizeof(struct pt_regs))
+
+/*
+ * Called by kernel/ptrace.c when detaching.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* nothing to do */
+}
+
+/*
+ * Get a register number from live pt_regs for the specified task.
+ */
+static inline long get_reg(struct task_struct *task, int regno)
+{
+	long *addr = (long *)task_pt_regs(task);
+
+	if (regno == PT_TSR || regno == PT_CSR)
+		return 0;
+
+	return addr[regno];
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+static inline int put_reg(struct task_struct *task,
+			  int regno,
+			  unsigned long data)
+{
+	unsigned long *addr = (unsigned long *)task_pt_regs(task);
+
+	if (regno != PT_TSR && regno != PT_CSR)
+		addr[regno] = data;
+
+	return 0;
+}
+
+/* regset get/set implementations */
+
+static int gpr_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   regs,
+				   0, sizeof(*regs));
+}
+
+static int gpr_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct pt_regs *regs = task_pt_regs(target);
+
+	/* Don't copyin TSR or CSR */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs,
+				 0, PT_TSR * sizeof(long));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					PT_TSR * sizeof(long),
+					(PT_TSR + 1) * sizeof(long));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs,
+				 (PT_TSR + 1) * sizeof(long),
+				 PT_CSR * sizeof(long));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					PT_CSR * sizeof(long),
+					(PT_CSR + 1) * sizeof(long));
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs,
+				 (PT_CSR + 1) * sizeof(long), -1);
+	return ret;
+}
+
+enum c6x_regset {
+	REGSET_GPR,
+};
+
+static const struct user_regset c6x_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.get = gpr_get,
+		.set = gpr_set
+	},
+};
+
+static const struct user_regset_view user_c6x_native_view = {
+	.name		= "tic6x",
+	.e_machine	= EM_TI_C6000,
+	.regsets	= c6x_regsets,
+	.n		= ARRAY_SIZE(c6x_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_c6x_native_view;
+}
+
+/*
+ * Perform ptrace request
+ */
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret = 0;
+
+	switch (request) {
+		/*
+		 * write the word at location addr.
+		 */
+	case PTRACE_POKETEXT:
+		ret = generic_ptrace_pokedata(child, addr, data);
+		if (ret == 0 && request == PTRACE_POKETEXT)
+			flush_icache_range(addr, addr + 4);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
+{
+	if (tracehook_report_syscall_entry(regs))
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * regs->orig_a4
+		 */
+		return ULONG_MAX;
+
+	return regs->b0;
+}
+
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
new file mode 100644
index 0000000..0c07921
--- /dev/null
+++ b/arch/c6x/kernel/setup.c
@@ -0,0 +1,510 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/memblock.h>
+#include <linux/seq_file.h>
+#include <linux/bootmem.h>
+#include <linux/clkdev.h>
+#include <linux/initrd.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+
+
+#include <asm/sections.h>
+#include <asm/div64.h>
+#include <asm/setup.h>
+#include <asm/dscr.h>
+#include <asm/clock.h>
+#include <asm/soc.h>
+
+static const char *c6x_soc_name;
+
+int c6x_num_cores;
+EXPORT_SYMBOL_GPL(c6x_num_cores);
+
+unsigned int c6x_silicon_rev;
+EXPORT_SYMBOL_GPL(c6x_silicon_rev);
+
+/*
+ * Device status register. This holds information
+ * about device configuration needed by some drivers.
+ */
+unsigned int c6x_devstat;
+EXPORT_SYMBOL_GPL(c6x_devstat);
+
+/*
+ * Some SoCs have fuse registers holding a unique MAC
+ * address. This is parsed out of the device tree with
+ * the resulting MAC being held here.
+ */
+unsigned char c6x_fuse_mac[6];
+
+unsigned long memory_start;
+unsigned long memory_end;
+
+unsigned long ram_start;
+unsigned long ram_end;
+
+/* Uncached memory for DMA consistent use (memdma=) */
+static unsigned long dma_start __initdata;
+static unsigned long dma_size __initdata;
+
+char c6x_command_line[COMMAND_LINE_SIZE];
+
+#if defined(CONFIG_CMDLINE_BOOL)
+static const char default_command_line[COMMAND_LINE_SIZE] __section(.cmdline) =
+	CONFIG_CMDLINE;
+#endif
+
+struct cpuinfo_c6x {
+	const char *cpu_name;
+	const char *cpu_voltage;
+	const char *mmu;
+	const char *fpu;
+	char *cpu_rev;
+	unsigned int core_id;
+	char __cpu_rev[5];
+};
+
+static DEFINE_PER_CPU(struct cpuinfo_c6x, cpu_data);
+
+unsigned int ticks_per_ns_scaled;
+EXPORT_SYMBOL(ticks_per_ns_scaled);
+
+unsigned int c6x_core_freq;
+
+static void __init get_cpuinfo(void)
+{
+	unsigned cpu_id, rev_id, csr;
+	struct clk *coreclk = clk_get_sys(NULL, "core");
+	unsigned long core_khz;
+	u64 tmp;
+	struct cpuinfo_c6x *p;
+	struct device_node *node, *np;
+
+	p = &per_cpu(cpu_data, smp_processor_id());
+
+	if (!IS_ERR(coreclk))
+		c6x_core_freq = clk_get_rate(coreclk);
+	else {
+		printk(KERN_WARNING
+		       "Cannot find core clock frequency. Using 700MHz\n");
+		c6x_core_freq = 700000000;
+	}
+
+	core_khz = c6x_core_freq / 1000;
+
+	tmp = (uint64_t)core_khz << C6X_NDELAY_SCALE;
+	do_div(tmp, 1000000);
+	ticks_per_ns_scaled = tmp;
+
+	csr = get_creg(CSR);
+	cpu_id = csr >> 24;
+	rev_id = (csr >> 16) & 0xff;
+
+	p->mmu = "none";
+	p->fpu = "none";
+	p->cpu_voltage = "unknown";
+
+	switch (cpu_id) {
+	case 0:
+		p->cpu_name = "C67x";
+		p->fpu = "yes";
+		break;
+	case 2:
+		p->cpu_name = "C62x";
+		break;
+	case 8:
+		p->cpu_name = "C64x";
+		break;
+	case 12:
+		p->cpu_name = "C64x";
+		break;
+	case 16:
+		p->cpu_name = "C64x+";
+		p->cpu_voltage = "1.2";
+		break;
+	default:
+		p->cpu_name = "unknown";
+		break;
+	}
+
+	if (cpu_id < 16) {
+		switch (rev_id) {
+		case 0x1:
+			if (cpu_id > 8) {
+				p->cpu_rev = "DM640/DM641/DM642/DM643";
+				p->cpu_voltage = "1.2 - 1.4";
+			} else {
+				p->cpu_rev = "C6201";
+				p->cpu_voltage = "2.5";
+			}
+			break;
+		case 0x2:
+			p->cpu_rev = "C6201B/C6202/C6211";
+			p->cpu_voltage = "1.8";
+			break;
+		case 0x3:
+			p->cpu_rev = "C6202B/C6203/C6204/C6205";
+			p->cpu_voltage = "1.5";
+			break;
+		case 0x201:
+			p->cpu_rev = "C6701 revision 0 (early CPU)";
+			p->cpu_voltage = "1.8";
+			break;
+		case 0x202:
+			p->cpu_rev = "C6701/C6711/C6712";
+			p->cpu_voltage = "1.8";
+			break;
+		case 0x801:
+			p->cpu_rev = "C64x";
+			p->cpu_voltage = "1.5";
+			break;
+		default:
+			p->cpu_rev = "unknown";
+		}
+	} else {
+		p->cpu_rev = p->__cpu_rev;
+		snprintf(p->__cpu_rev, sizeof(p->__cpu_rev), "0x%x", cpu_id);
+	}
+
+	p->core_id = get_coreid();
+
+	node = of_find_node_by_name(NULL, "cpus");
+	if (node) {
+		for_each_child_of_node(node, np)
+			if (!strcmp("cpu", np->name))
+				++c6x_num_cores;
+		of_node_put(node);
+	}
+
+	node = of_find_node_by_name(NULL, "soc");
+	if (node) {
+		if (of_property_read_string(node, "model", &c6x_soc_name))
+			c6x_soc_name = "unknown";
+		of_node_put(node);
+	} else
+		c6x_soc_name = "unknown";
+
+	printk(KERN_INFO "CPU%d: %s rev %s, %s volts, %uMHz\n",
+	       p->core_id, p->cpu_name, p->cpu_rev,
+	       p->cpu_voltage, c6x_core_freq / 1000000);
+}
+
+/*
+ * Early parsing of the command line
+ */
+static u32 mem_size __initdata;
+
+/* "mem=" parsing. */
+static int __init early_mem(char *p)
+{
+	if (!p)
+		return -EINVAL;
+
+	mem_size = memparse(p, &p);
+	/* don't remove all of memory when handling "mem={invalid}" */
+	if (mem_size == 0)
+		return -EINVAL;
+
+	return 0;
+}
+early_param("mem", early_mem);
+
+/* "memdma=<size>[@<address>]" parsing. */
+static int __init early_memdma(char *p)
+{
+	if (!p)
+		return -EINVAL;
+
+	dma_size = memparse(p, &p);
+	if (*p == '@')
+		dma_start = memparse(p, &p);
+
+	return 0;
+}
+early_param("memdma", early_memdma);
+
+int __init c6x_add_memory(phys_addr_t start, unsigned long size)
+{
+	static int ram_found __initdata;
+
+	/* We only handle one bank (the one with PAGE_OFFSET) for now */
+	if (ram_found)
+		return -EINVAL;
+
+	if (start > PAGE_OFFSET || PAGE_OFFSET >= (start + size))
+		return 0;
+
+	ram_start = start;
+	ram_end = start + size;
+
+	ram_found = 1;
+	return 0;
+}
+
+/*
+ * Do early machine setup and device tree parsing. This is called very
+ * early on the boot process.
+ */
+notrace void __init machine_init(unsigned long dt_ptr)
+{
+	struct boot_param_header *dtb = __va(dt_ptr);
+	struct boot_param_header *fdt = (struct boot_param_header *)_fdt_start;
+
+	/* interrupts must be masked */
+	set_creg(IER, 2);
+
+	/*
+	 * Set the Interrupt Service Table (IST) to the beginning of the
+	 * vector table.
+	 */
+	set_ist(_vectors_start);
+
+	lockdep_init();
+
+	/*
+	 * dtb is passed in from bootloader.
+	 * fdt is linked in blob.
+	 */
+	if (dtb && dtb != fdt)
+		fdt = dtb;
+
+	/* Do some early initialization based on the flat device tree */
+	early_init_devtree(fdt);
+
+	/* parse_early_param needs a boot_command_line */
+	strlcpy(boot_command_line, c6x_command_line, COMMAND_LINE_SIZE);
+	parse_early_param();
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	int bootmap_size;
+	struct memblock_region *reg;
+
+	printk(KERN_INFO "Initializing kernel\n");
+
+	/* Initialize command line */
+	*cmdline_p = c6x_command_line;
+
+	memory_end = ram_end;
+	memory_end &= ~(PAGE_SIZE - 1);
+
+	if (mem_size && (PAGE_OFFSET + PAGE_ALIGN(mem_size)) < memory_end)
+		memory_end = PAGE_OFFSET + PAGE_ALIGN(mem_size);
+
+	/* add block that this kernel can use */
+	memblock_add(PAGE_OFFSET, memory_end - PAGE_OFFSET);
+
+	/* reserve kernel text/data/bss */
+	memblock_reserve(PAGE_OFFSET,
+			 PAGE_ALIGN((unsigned long)&_end - PAGE_OFFSET));
+
+	if (dma_size) {
+		/* align to cacheability granularity */
+		dma_size = CACHE_REGION_END(dma_size);
+
+		if (!dma_start)
+			dma_start = memory_end - dma_size;
+
+		/* align to cacheability granularity */
+		dma_start = CACHE_REGION_START(dma_start);
+
+		/* reserve DMA memory taken from kernel memory */
+		if (memblock_is_region_memory(dma_start, dma_size))
+			memblock_reserve(dma_start, dma_size);
+	}
+
+	memory_start = PAGE_ALIGN((unsigned int) &_end);
+
+	printk(KERN_INFO "Memory Start=%08lx, Memory End=%08lx\n",
+	       memory_start, memory_end);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Reserve initrd memory if in kernel memory.
+	 */
+	if (initrd_start < initrd_end)
+		if (memblock_is_region_memory(initrd_start,
+					      initrd_end - initrd_start))
+			memblock_reserve(initrd_start,
+					 initrd_end - initrd_start);
+#endif
+
+	init_mm.start_code = (unsigned long) &_stext;
+	init_mm.end_code   = (unsigned long) &_etext;
+	init_mm.end_data   = memory_start;
+	init_mm.brk        = memory_start;
+
+	/*
+	 * Give all the memory to the bootmap allocator,  tell it to put the
+	 * boot mem_map at the start of memory
+	 */
+	bootmap_size = init_bootmem_node(NODE_DATA(0),
+					 memory_start >> PAGE_SHIFT,
+					 PAGE_OFFSET >> PAGE_SHIFT,
+					 memory_end >> PAGE_SHIFT);
+	memblock_reserve(memory_start, bootmap_size);
+
+	unflatten_device_tree();
+
+	c6x_cache_init();
+
+	/* Set the whole external memory as non-cacheable */
+	disable_caching(ram_start, ram_end - 1);
+
+	/* Set caching of external RAM used by Linux */
+	for_each_memblock(memory, reg)
+		enable_caching(CACHE_REGION_START(reg->base),
+			       CACHE_REGION_START(reg->base + reg->size - 1));
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Enable caching for initrd which falls outside kernel memory.
+	 */
+	if (initrd_start < initrd_end) {
+		if (!memblock_is_region_memory(initrd_start,
+					       initrd_end - initrd_start))
+			enable_caching(CACHE_REGION_START(initrd_start),
+				       CACHE_REGION_START(initrd_end - 1));
+	}
+#endif
+
+	/*
+	 * Disable caching for dma coherent memory taken from kernel memory.
+	 */
+	if (dma_size && memblock_is_region_memory(dma_start, dma_size))
+		disable_caching(dma_start,
+				CACHE_REGION_START(dma_start + dma_size - 1));
+
+	/* Initialize the coherent memory allocator */
+	coherent_mem_init(dma_start, dma_size);
+
+	/*
+	 * Free all memory as a starting point.
+	 */
+	free_bootmem(PAGE_OFFSET, memory_end - PAGE_OFFSET);
+
+	/*
+	 * Then reserve memory which is already being used.
+	 */
+	for_each_memblock(reserved, reg) {
+		pr_debug("reserved - 0x%08x-0x%08x\n",
+			 (u32) reg->base, (u32) reg->size);
+		reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+	}
+
+	max_low_pfn = PFN_DOWN(memory_end);
+	min_low_pfn = PFN_UP(memory_start);
+	max_mapnr = max_low_pfn - min_low_pfn;
+
+	/* Get kmalloc into gear */
+	paging_init();
+
+	/*
+	 * Probe for Device State Configuration Registers.
+	 * We have to do this early in case timer needs to be enabled
+	 * through DSCR.
+	 */
+	dscr_probe();
+
+	/* We do this early for timer and core clock frequency */
+	c64x_setup_clocks();
+
+	/* Get CPU info */
+	get_cpuinfo();
+
+#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+}
+
+#define cpu_to_ptr(n) ((void *)((long)(n)+1))
+#define ptr_to_cpu(p) ((long)(p) - 1)
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	int n = ptr_to_cpu(v);
+	struct cpuinfo_c6x *p = &per_cpu(cpu_data, n);
+
+	if (n == 0) {
+		seq_printf(m,
+			   "soc\t\t: %s\n"
+			   "soc revision\t: 0x%x\n"
+			   "soc cores\t: %d\n",
+			   c6x_soc_name, c6x_silicon_rev, c6x_num_cores);
+	}
+
+	seq_printf(m,
+		   "\n"
+		   "processor\t: %d\n"
+		   "cpu\t\t: %s\n"
+		   "core revision\t: %s\n"
+		   "core voltage\t: %s\n"
+		   "core id\t\t: %d\n"
+		   "mmu\t\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "cpu MHz\t\t: %u\n"
+		   "bogomips\t: %lu.%02lu\n\n",
+		   n,
+		   p->cpu_name, p->cpu_rev, p->cpu_voltage,
+		   p->core_id, p->mmu, p->fpu,
+		   (c6x_core_freq + 500000) / 1000000,
+		   (loops_per_jiffy/(500000/HZ)),
+		   (loops_per_jiffy/(5000/HZ))%100);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	c_start,
+	c_stop,
+	c_next,
+	show_cpuinfo
+};
+
+static struct cpu cpu_devices[NR_CPUS];
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for_each_present_cpu(i)
+		register_cpu(&cpu_devices[i], i);
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
new file mode 100644
index 0000000..304f675
--- /dev/null
+++ b/arch/c6x/kernel/signal.c
@@ -0,0 +1,377 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/tracehook.h>
+
+#include <asm/ucontext.h>
+#include <asm/cacheflush.h>
+
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+/*
+ * Do a signal return, undo the signal stack.
+ */
+
+#define RETCODE_SIZE (9 << 2)	/* 9 instructions = 36 bytes */
+
+struct rt_sigframe {
+	struct siginfo __user *pinfo;
+	void __user *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned long retcode[RETCODE_SIZE >> 2];
+};
+
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc)
+{
+	int err = 0;
+
+	/* The access_ok check was done by caller, so use __get_user here */
+#define COPY(x)  (err |= __get_user(regs->x, &sc->sc_##x))
+
+	COPY(sp); COPY(a4); COPY(b4); COPY(a6); COPY(b6); COPY(a8); COPY(b8);
+	COPY(a0); COPY(a1); COPY(a2); COPY(a3); COPY(a5); COPY(a7); COPY(a9);
+	COPY(b0); COPY(b1); COPY(b2); COPY(b3); COPY(b5); COPY(b7); COPY(b9);
+
+	COPY(a16); COPY(a17); COPY(a18); COPY(a19);
+	COPY(a20); COPY(a21); COPY(a22); COPY(a23);
+	COPY(a24); COPY(a25); COPY(a26); COPY(a27);
+	COPY(a28); COPY(a29); COPY(a30); COPY(a31);
+	COPY(b16); COPY(b17); COPY(b18); COPY(b19);
+	COPY(b20); COPY(b21); COPY(b22); COPY(b23);
+	COPY(b24); COPY(b25); COPY(b26); COPY(b27);
+	COPY(b28); COPY(b29); COPY(b30); COPY(b31);
+
+	COPY(csr); COPY(pc);
+
+#undef COPY
+
+	return err;
+}
+
+asmlinkage int do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	sigset_t set;
+
+	/*
+	 * Since we stacked the signal on a dword boundary,
+	 * 'sp' should be dword aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->sp & 7)
+		goto badframe;
+
+	frame = (struct rt_sigframe __user *) ((unsigned long) regs->sp + 8);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	return regs->a4;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+			    unsigned long mask)
+{
+	int err = 0;
+
+	err |= __put_user(mask, &sc->sc_mask);
+
+	/* The access_ok check was done by caller, so use __put_user here */
+#define COPY(x) (err |= __put_user(regs->x, &sc->sc_##x))
+
+	COPY(sp); COPY(a4); COPY(b4); COPY(a6); COPY(b6); COPY(a8); COPY(b8);
+	COPY(a0); COPY(a1); COPY(a2); COPY(a3); COPY(a5); COPY(a7); COPY(a9);
+	COPY(b0); COPY(b1); COPY(b2); COPY(b3); COPY(b5); COPY(b7); COPY(b9);
+
+	COPY(a16); COPY(a17); COPY(a18); COPY(a19);
+	COPY(a20); COPY(a21); COPY(a22); COPY(a23);
+	COPY(a24); COPY(a25); COPY(a26); COPY(a27);
+	COPY(a28); COPY(a29); COPY(a30); COPY(a31);
+	COPY(b16); COPY(b17); COPY(b18); COPY(b19);
+	COPY(b20); COPY(b21); COPY(b22); COPY(b23);
+	COPY(b24); COPY(b25); COPY(b26); COPY(b27);
+	COPY(b28); COPY(b29); COPY(b30); COPY(b31);
+
+	COPY(csr); COPY(pc);
+
+#undef COPY
+
+	return err;
+}
+
+static inline void __user *get_sigframe(struct k_sigaction *ka,
+					struct pt_regs *regs,
+					unsigned long framesize)
+{
+	unsigned long sp = regs->sp;
+
+	/*
+	 * This is the X/Open sanctioned signal stack switching.
+	 */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(sp) == 0)
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	/*
+	 * No matter what happens, 'sp' must be dword
+	 * aligned. Otherwise, nasty things will happen
+	 */
+	return (void __user *)((sp - framesize) & ~7);
+}
+
+static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
+			   sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long __user *retcode;
+	int err = 0;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto segv_and_exit;
+
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Clear all the bits of the ucontext we don't use.  */
+	err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+
+	err |= setup_sigcontext(&frame->uc.uc_mcontext,	regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* Set up to return from userspace */
+	retcode = (unsigned long __user *) &frame->retcode;
+
+	/* The access_ok check was done above, so use __put_user here */
+#define COPY(x) (err |= __put_user(x, retcode++))
+
+	COPY(0x0000002AUL | (__NR_rt_sigreturn << 7));
+				/* MVK __NR_rt_sigreturn,B0 */
+	COPY(0x10000000UL);	/* SWE */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+	COPY(0x00006000UL);	/* NOP 4 */
+
+#undef COPY
+
+	if (err)
+		goto segv_and_exit;
+
+	flush_icache_range((unsigned long) &frame->retcode,
+			   (unsigned long) &frame->retcode + RETCODE_SIZE);
+
+	retcode = (unsigned long __user *) &frame->retcode;
+
+	/* Change user context to branch to signal handler */
+	regs->sp = (unsigned long) frame - 8;
+	regs->b3 = (unsigned long) retcode;
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+
+	/* Give the signal number to the handler */
+	regs->a4 = signr;
+
+	/*
+	 * For realtime signals we must also set the second and third
+	 * arguments for the signal handler.
+	 *   -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
+	 */
+	regs->b4 = (unsigned long)&frame->info;
+	regs->a6 = (unsigned long)&frame->uc;
+
+	return 0;
+
+segv_and_exit:
+	force_sigsegv(signr, current);
+	return -EFAULT;
+}
+
+static inline void
+handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
+{
+	switch (regs->a4) {
+	case -ERESTARTNOHAND:
+		if (!has_handler)
+			goto do_restart;
+		regs->a4 = -EINTR;
+		break;
+
+	case -ERESTARTSYS:
+		if (has_handler && !(ka->sa.sa_flags & SA_RESTART)) {
+			regs->a4 = -EINTR;
+			break;
+		}
+	/* fallthrough */
+	case -ERESTARTNOINTR:
+do_restart:
+		regs->a4 = regs->orig_a4;
+		regs->pc -= 4;
+		break;
+	}
+}
+
+/*
+ * handle the actual delivery of a signal to userspace
+ */
+static int handle_signal(int sig,
+			 siginfo_t *info, struct k_sigaction *ka,
+			 sigset_t *oldset, struct pt_regs *regs,
+			 int syscall)
+{
+	int ret;
+
+	/* Are we from a system call? */
+	if (syscall) {
+		/* If so, check system call restarting.. */
+		switch (regs->a4) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->a4 = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->a4 = -EINTR;
+				break;
+			}
+
+			/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->a4 = regs->orig_a4;
+			regs->pc -= 4;
+		}
+	}
+
+	/* Set up the stack frame */
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	if (ret == 0) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked, &current->blocked,
+			  &ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked, sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	return ret;
+}
+
+/*
+ * handle a potential signal
+ */
+static void do_signal(struct pt_regs *regs, int syscall)
+{
+	struct k_sigaction ka;
+	siginfo_t info;
+	sigset_t *oldset;
+	int signr;
+
+	/* we want the common case to go fast, which is why we may in certain
+	 * cases get here from kernel mode */
+	if (!user_mode(regs))
+		return;
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		if (handle_signal(signr, &info, &ka, oldset,
+				  regs, syscall) == 0) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, regs, 0);
+		}
+
+		return;
+	}
+
+	/* did we come from a system call? */
+	if (syscall) {
+		/* restart the system call - no handlers present */
+		switch (regs->a4) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->a4 = regs->orig_a4;
+			regs->pc -= 4;
+			break;
+
+		case -ERESTART_RESTARTBLOCK:
+			regs->a4 = regs->orig_a4;
+			regs->b0 = __NR_restart_syscall;
+			regs->pc -= 4;
+			break;
+		}
+	}
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
+
+/*
+ * notification of userspace execution resumption
+ * - triggered by current->work.notify_resume
+ */
+asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags,
+				 int syscall)
+{
+	/* deal with pending signal delivery */
+	if (thread_info_flags & ((1 << TIF_SIGPENDING) |
+				 (1 << TIF_RESTORE_SIGMASK)))
+		do_signal(regs, syscall);
+
+	if (thread_info_flags & (1 << TIF_NOTIFY_RESUME)) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
+}
diff --git a/arch/c6x/kernel/soc.c b/arch/c6x/kernel/soc.c
new file mode 100644
index 0000000..dd45bc3
--- /dev/null
+++ b/arch/c6x/kernel/soc.c
@@ -0,0 +1,91 @@
+/*
+ *  Miscellaneous SoC-specific hooks.
+ *
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/etherdevice.h>
+#include <asm/system.h>
+#include <asm/setup.h>
+#include <asm/soc.h>
+
+struct soc_ops soc_ops;
+
+int soc_get_exception(void)
+{
+	if (!soc_ops.get_exception)
+		return -1;
+	return soc_ops.get_exception();
+}
+
+void soc_assert_event(unsigned int evt)
+{
+	if (soc_ops.assert_event)
+		soc_ops.assert_event(evt);
+}
+
+static u8 cmdline_mac[6];
+
+static int __init get_mac_addr_from_cmdline(char *str)
+{
+	int count, i, val;
+
+	for (count = 0; count < 6 && *str; count++, str += 3) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1]))
+			return 0;
+		if (str[2] != ((count < 5) ? ':' : '\0'))
+			return 0;
+
+		for (i = 0, val = 0; i < 2; i++) {
+			val = val << 4;
+			val |= isdigit(str[i]) ?
+				str[i] - '0' : toupper(str[i]) - 'A' + 10;
+		}
+		cmdline_mac[count] = val;
+	}
+	return 1;
+}
+__setup("emac_addr=", get_mac_addr_from_cmdline);
+
+/*
+ * Setup the MAC address for SoC ethernet devices.
+ *
+ * Before calling this function, the ethernet driver will have
+ * initialized the addr with local-mac-address from the device
+ * tree (if found). Allow command line to override, but not
+ * the fused address.
+ */
+int soc_mac_addr(unsigned int index, u8 *addr)
+{
+	int i, have_dt_mac = 0, have_cmdline_mac = 0, have_fuse_mac = 0;
+
+	for (i = 0; i < 6; i++) {
+		if (cmdline_mac[i])
+			have_cmdline_mac = 1;
+		if (c6x_fuse_mac[i])
+			have_fuse_mac = 1;
+		if (addr[i])
+			have_dt_mac = 1;
+	}
+
+	/* cmdline overrides all */
+	if (have_cmdline_mac)
+		memcpy(addr, cmdline_mac, 6);
+	else if (!have_dt_mac) {
+		if (have_fuse_mac)
+			memcpy(addr, c6x_fuse_mac, 6);
+		else
+			random_ether_addr(addr);
+	}
+
+	/* adjust for specific EMAC device */
+	addr[5] += index * c6x_num_cores;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(soc_mac_addr);
diff --git a/arch/c6x/kernel/switch_to.S b/arch/c6x/kernel/switch_to.S
new file mode 100644
index 0000000..09177ed
--- /dev/null
+++ b/arch/c6x/kernel/switch_to.S
@@ -0,0 +1,74 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter (msalter@redhat.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+#define SP	B15
+
+	/*
+	 * void __switch_to(struct thread_info *prev,
+	 *      	    struct thread_info *next,
+	 *		    struct task_struct *tsk) ;
+	 */
+ENTRY(__switch_to)
+	LDDW	.D2T2	*+B4(THREAD_B15_14),B7:B6
+ ||	MV	.L2X	A4,B5	; prev
+ ||	MV	.L1X	B4,A5	; next
+ ||	MVC	.S2	RILC,B1
+
+	STW	.D2T2	B3,*+B5(THREAD_PC)
+ ||	STDW	.D1T1	A13:A12,*+A4(THREAD_A13_12)
+ ||	MVC	.S2	ILC,B0
+
+	LDW	.D2T2	*+B4(THREAD_PC),B3
+ ||	LDDW	.D1T1	*+A5(THREAD_A13_12),A13:A12
+
+	STDW	.D1T1	A11:A10,*+A4(THREAD_A11_10)
+ ||	STDW	.D2T2	B1:B0,*+B5(THREAD_RICL_ICL)
+#ifndef __DSBT__
+ ||	MVKL	.S2	current_ksp,B1
+#endif
+
+	STDW	.D2T2	B15:B14,*+B5(THREAD_B15_14)
+ ||	STDW	.D1T1	A15:A14,*+A4(THREAD_A15_14)
+#ifndef __DSBT__
+ ||	MVKH	.S2	current_ksp,B1
+#endif
+
+	;; Switch to next SP
+	MV	.S2	B7,SP
+#ifdef __DSBT__
+ ||	STW	.D2T2	B7,*+B14(current_ksp)
+#else
+ ||	STW	.D2T2	B7,*B1
+ ||	MV	.L2	B6,B14
+#endif
+ ||	LDDW	.D1T1	*+A5(THREAD_RICL_ICL),A1:A0
+
+	STDW	.D2T2	B11:B10,*+B5(THREAD_B11_10)
+ ||	LDDW	.D1T1	*+A5(THREAD_A15_14),A15:A14
+
+	STDW	.D2T2	B13:B12,*+B5(THREAD_B13_12)
+ ||	LDDW	.D1T1	*+A5(THREAD_A11_10),A11:A10
+
+	B	.S2	B3		; return in next E1
+ ||	LDDW	.D2T2	*+B4(THREAD_B13_12),B13:B12
+
+	LDDW	.D2T2	*+B4(THREAD_B11_10),B11:B10
+	NOP
+
+	MV	.L2X	A0,B0
+ ||	MV	.S1	A6,A4
+
+	MVC	.S2	B0,ILC
+ ||	MV	.L2X	A1,B1
+
+	MVC	.S2	B1,RILC
+ENDPROC(__switch_to)
diff --git a/arch/c6x/kernel/sys_c6x.c b/arch/c6x/kernel/sys_c6x.c
new file mode 100644
index 0000000..3e9bdfb
--- /dev/null
+++ b/arch/c6x/kernel/sys_c6x.c
@@ -0,0 +1,74 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include <asm/syscalls.h>
+
+#ifdef CONFIG_ACCESS_CHECK
+int _access_ok(unsigned long addr, unsigned long size)
+{
+	if (!size)
+		return 1;
+
+	if (!addr || addr > (0xffffffffUL - (size - 1)))
+		goto _bad_access;
+
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return 1;
+
+	if (memory_start <= addr && (addr + size - 1) < memory_end)
+		return 1;
+
+_bad_access:
+	pr_debug("Bad access attempt: pid[%d] addr[%08lx] size[0x%lx]\n",
+		 current->pid, addr, size);
+	return 0;
+}
+EXPORT_SYMBOL(_access_ok);
+#endif
+
+/* sys_cache_sync -- sync caches over given range */
+asmlinkage int sys_cache_sync(unsigned long s, unsigned long e)
+{
+	L1D_cache_block_writeback_invalidate(s, e);
+	L1P_cache_block_invalidate(s, e);
+
+	return 0;
+}
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+/*
+ * Use trampolines
+ */
+#define sys_pread64		sys_pread_c6x
+#define sys_pwrite64		sys_pwrite_c6x
+#define sys_truncate64		sys_truncate64_c6x
+#define sys_ftruncate64		sys_ftruncate64_c6x
+#define sys_fadvise64		sys_fadvise64_c6x
+#define sys_fadvise64_64	sys_fadvise64_64_c6x
+#define sys_fallocate		sys_fallocate_c6x
+
+/* Use sys_mmap_pgoff directly */
+#define sys_mmap2 sys_mmap_pgoff
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+void *sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c
new file mode 100644
index 0000000..4c9f136
--- /dev/null
+++ b/arch/c6x/kernel/time.c
@@ -0,0 +1,65 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/profile.h>
+
+#include <asm/timer64.h>
+
+static u32 sched_clock_multiplier;
+#define SCHED_CLOCK_SHIFT 16
+
+static cycle_t tsc_read(struct clocksource *cs)
+{
+	return get_cycles();
+}
+
+static struct clocksource clocksource_tsc = {
+	.name		= "timestamp",
+	.rating		= 300,
+	.read		= tsc_read,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
+ * scheduler clock - returns current time in nanoseconds.
+ */
+u64 sched_clock(void)
+{
+	u64 tsc = get_cycles();
+
+	return (tsc * sched_clock_multiplier) >> SCHED_CLOCK_SHIFT;
+}
+
+void time_init(void)
+{
+	u64 tmp = (u64)NSEC_PER_SEC << SCHED_CLOCK_SHIFT;
+
+	do_div(tmp, c6x_core_freq);
+	sched_clock_multiplier = tmp;
+
+	clocksource_register_hz(&clocksource_tsc, c6x_core_freq);
+
+	/* write anything into TSCL to enable counting */
+	set_creg(TSCL, 0);
+
+	/* probe for timer64 event timer */
+	timer64_init();
+}
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
new file mode 100644
index 0000000..f50e3ed
--- /dev/null
+++ b/arch/c6x/kernel/traps.c
@@ -0,0 +1,423 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kallsyms.h>
+#include <linux/bug.h>
+
+#include <asm/soc.h>
+#include <asm/traps.h>
+
+int (*c6x_nmi_handler)(struct pt_regs *regs);
+
+void __init trap_init(void)
+{
+	ack_exception(EXCEPT_TYPE_NXF);
+	ack_exception(EXCEPT_TYPE_EXC);
+	ack_exception(EXCEPT_TYPE_IXF);
+	ack_exception(EXCEPT_TYPE_SXF);
+	enable_exception();
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	pr_err("\n");
+	pr_err("PC: %08lx SP: %08lx\n", regs->pc, regs->sp);
+	pr_err("Status: %08lx ORIG_A4: %08lx\n", regs->csr, regs->orig_a4);
+	pr_err("A0: %08lx  B0: %08lx\n", regs->a0, regs->b0);
+	pr_err("A1: %08lx  B1: %08lx\n", regs->a1, regs->b1);
+	pr_err("A2: %08lx  B2: %08lx\n", regs->a2, regs->b2);
+	pr_err("A3: %08lx  B3: %08lx\n", regs->a3, regs->b3);
+	pr_err("A4: %08lx  B4: %08lx\n", regs->a4, regs->b4);
+	pr_err("A5: %08lx  B5: %08lx\n", regs->a5, regs->b5);
+	pr_err("A6: %08lx  B6: %08lx\n", regs->a6, regs->b6);
+	pr_err("A7: %08lx  B7: %08lx\n", regs->a7, regs->b7);
+	pr_err("A8: %08lx  B8: %08lx\n", regs->a8, regs->b8);
+	pr_err("A9: %08lx  B9: %08lx\n", regs->a9, regs->b9);
+	pr_err("A10: %08lx  B10: %08lx\n", regs->a10, regs->b10);
+	pr_err("A11: %08lx  B11: %08lx\n", regs->a11, regs->b11);
+	pr_err("A12: %08lx  B12: %08lx\n", regs->a12, regs->b12);
+	pr_err("A13: %08lx  B13: %08lx\n", regs->a13, regs->b13);
+	pr_err("A14: %08lx  B14: %08lx\n", regs->a14, regs->dp);
+	pr_err("A15: %08lx  B15: %08lx\n", regs->a15, regs->sp);
+	pr_err("A16: %08lx  B16: %08lx\n", regs->a16, regs->b16);
+	pr_err("A17: %08lx  B17: %08lx\n", regs->a17, regs->b17);
+	pr_err("A18: %08lx  B18: %08lx\n", regs->a18, regs->b18);
+	pr_err("A19: %08lx  B19: %08lx\n", regs->a19, regs->b19);
+	pr_err("A20: %08lx  B20: %08lx\n", regs->a20, regs->b20);
+	pr_err("A21: %08lx  B21: %08lx\n", regs->a21, regs->b21);
+	pr_err("A22: %08lx  B22: %08lx\n", regs->a22, regs->b22);
+	pr_err("A23: %08lx  B23: %08lx\n", regs->a23, regs->b23);
+	pr_err("A24: %08lx  B24: %08lx\n", regs->a24, regs->b24);
+	pr_err("A25: %08lx  B25: %08lx\n", regs->a25, regs->b25);
+	pr_err("A26: %08lx  B26: %08lx\n", regs->a26, regs->b26);
+	pr_err("A27: %08lx  B27: %08lx\n", regs->a27, regs->b27);
+	pr_err("A28: %08lx  B28: %08lx\n", regs->a28, regs->b28);
+	pr_err("A29: %08lx  B29: %08lx\n", regs->a29, regs->b29);
+	pr_err("A30: %08lx  B30: %08lx\n", regs->a30, regs->b30);
+	pr_err("A31: %08lx  B31: %08lx\n", regs->a31, regs->b31);
+}
+
+void dump_stack(void)
+{
+	unsigned long stack;
+
+	show_stack(current, &stack);
+}
+EXPORT_SYMBOL(dump_stack);
+
+
+void die(char *str, struct pt_regs *fp, int nr)
+{
+	console_verbose();
+	pr_err("%s: %08x\n", str, nr);
+	show_regs(fp);
+
+	pr_err("Process %s (pid: %d, stackpage=%08lx)\n",
+	       current->comm, current->pid, (PAGE_SIZE +
+					     (unsigned long) current));
+
+	dump_stack();
+	while (1)
+		;
+}
+
+static void die_if_kernel(char *str, struct pt_regs *fp, int nr)
+{
+	if (user_mode(fp))
+		return;
+
+	die(str, fp, nr);
+}
+
+
+/* Internal exceptions */
+static struct exception_info iexcept_table[10] = {
+	{ "Oops - instruction fetch", SIGBUS, BUS_ADRERR },
+	{ "Oops - fetch packet", SIGBUS, BUS_ADRERR },
+	{ "Oops - execute packet", SIGILL, ILL_ILLOPC },
+	{ "Oops - undefined instruction", SIGILL, ILL_ILLOPC },
+	{ "Oops - resource conflict", SIGILL, ILL_ILLOPC },
+	{ "Oops - resource access", SIGILL, ILL_PRVREG },
+	{ "Oops - privilege", SIGILL, ILL_PRVOPC },
+	{ "Oops - loops buffer", SIGILL, ILL_ILLOPC },
+	{ "Oops - software exception", SIGILL, ILL_ILLTRP },
+	{ "Oops - unknown exception", SIGILL, ILL_ILLOPC }
+};
+
+/* External exceptions */
+static struct exception_info eexcept_table[128] = {
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - external exception", SIGBUS, BUS_ADRERR },
+	{ "Oops - CPU memory protection fault", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - CPU memory protection fault in L1P", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - DMA memory protection fault in L1P", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - CPU memory protection fault in L1D", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - DMA memory protection fault in L1D", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - CPU memory protection fault in L2", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - DMA memory protection fault in L2", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - EMC CPU memory protection fault", SIGSEGV, SEGV_ACCERR },
+	{ "Oops - EMC bus error", SIGBUS, BUS_ADRERR }
+};
+
+static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	siginfo_t info;
+
+	if (except_info->code != TRAP_BRKPT)
+		pr_err("TRAP: %s PC[0x%lx] signo[%d] code[%d]\n",
+		       except_info->kernel_str, regs->pc,
+		       except_info->signo, except_info->code);
+
+	die_if_kernel(except_info->kernel_str, regs, addr);
+
+	info.si_signo = except_info->signo;
+	info.si_errno = 0;
+	info.si_code  = except_info->code;
+	info.si_addr  = (void __user *)addr;
+
+	force_sig_info(except_info->signo, &info, current);
+}
+
+/*
+ * Process an internal exception (non maskable)
+ */
+static int process_iexcept(struct pt_regs *regs)
+{
+	unsigned int iexcept_report = get_iexcept();
+	unsigned int iexcept_num;
+
+	ack_exception(EXCEPT_TYPE_IXF);
+
+	pr_err("IEXCEPT: PC[0x%lx]\n", regs->pc);
+
+	while (iexcept_report) {
+		iexcept_num = __ffs(iexcept_report);
+		iexcept_report &= ~(1 << iexcept_num);
+		set_iexcept(iexcept_report);
+		if (*(unsigned int *)regs->pc == BKPT_OPCODE) {
+			/* This is a breakpoint */
+			struct exception_info bkpt_exception = {
+				"Oops - undefined instruction",
+				  SIGTRAP, TRAP_BRKPT
+			};
+			do_trap(&bkpt_exception, regs);
+			iexcept_report &= ~(0xFF);
+			set_iexcept(iexcept_report);
+			continue;
+		}
+
+		do_trap(&iexcept_table[iexcept_num], regs);
+	}
+	return 0;
+}
+
+/*
+ * Process an external exception (maskable)
+ */
+static void process_eexcept(struct pt_regs *regs)
+{
+	int evt;
+
+	pr_err("EEXCEPT: PC[0x%lx]\n", regs->pc);
+
+	while ((evt = soc_get_exception()) >= 0)
+		do_trap(&eexcept_table[evt], regs);
+
+	ack_exception(EXCEPT_TYPE_EXC);
+}
+
+/*
+ * Main exception processing
+ */
+asmlinkage int process_exception(struct pt_regs *regs)
+{
+	unsigned int type;
+	unsigned int type_num;
+	unsigned int ie_num = 9; /* default is unknown exception */
+
+	while ((type = get_except_type()) != 0) {
+		type_num = fls(type) - 1;
+
+		switch (type_num) {
+		case EXCEPT_TYPE_NXF:
+			ack_exception(EXCEPT_TYPE_NXF);
+			if (c6x_nmi_handler)
+				(c6x_nmi_handler)(regs);
+			else
+				pr_alert("NMI interrupt!\n");
+			break;
+
+		case EXCEPT_TYPE_IXF:
+			if (process_iexcept(regs))
+				return 1;
+			break;
+
+		case EXCEPT_TYPE_EXC:
+			process_eexcept(regs);
+			break;
+
+		case EXCEPT_TYPE_SXF:
+			ie_num = 8;
+		default:
+			ack_exception(type_num);
+			do_trap(&iexcept_table[ie_num], regs);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int kstack_depth_to_print = 48;
+
+static void show_trace(unsigned long *stack, unsigned long *endstack)
+{
+	unsigned long addr;
+	int i;
+
+	pr_debug("Call trace:");
+	i = 0;
+	while (stack + 1 <= endstack) {
+		addr = *stack++;
+		/*
+		 * If the address is either in the text segment of the
+		 * kernel, or in the region which contains vmalloc'ed
+		 * memory, it *may* be the address of a calling
+		 * routine; if so, print it so that someone tracing
+		 * down the cause of the crash will be able to figure
+		 * out the call path that was taken.
+		 */
+		if (__kernel_text_address(addr)) {
+#ifndef CONFIG_KALLSYMS
+			if (i % 5 == 0)
+				pr_debug("\n	    ");
+#endif
+			pr_debug(" [<%08lx>]", addr);
+			print_symbol(" %s\n", addr);
+			i++;
+		}
+	}
+	pr_debug("\n");
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack)
+{
+	unsigned long *p, *endstack;
+	int i;
+
+	if (!stack) {
+		if (task && task != current)
+			/* We know this is a kernel stack,
+			   so this is the start/end */
+			stack = (unsigned long *)thread_saved_ksp(task);
+		else
+			stack = (unsigned long *)&stack;
+	}
+	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1)
+				     & -THREAD_SIZE);
+
+	pr_debug("Stack from %08lx:", (unsigned long)stack);
+	for (i = 0, p = stack; i < kstack_depth_to_print; i++) {
+		if (p + 1 > endstack)
+			break;
+		if (i % 8 == 0)
+			pr_cont("\n	    ");
+		pr_cont(" %08lx", *p++);
+	}
+	pr_cont("\n");
+	show_trace(stack, endstack);
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+	return __kernel_text_address(addr);
+}
diff --git a/arch/c6x/kernel/vectors.S b/arch/c6x/kernel/vectors.S
new file mode 100644
index 0000000..c95c66f
--- /dev/null
+++ b/arch/c6x/kernel/vectors.S
@@ -0,0 +1,81 @@
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+;  This section handles all the interrupt vector routines.
+;  At RESET the processor sets up the DRAM timing parameters and
+;  branches to the label _c_int00 which handles initialization for the C code.
+;
+
+#define ALIGNMENT 5
+
+	.macro IRQVEC name, handler
+	.align ALIGNMENT
+	.hidden \name
+	.global \name
+\name:
+#ifdef CONFIG_C6X_BIG_KERNEL
+	STW	.D2T1	A0,*B15--[2]
+ ||	MVKL	.S1	\handler,A0
+	MVKH	.S1	\handler,A0
+	B	.S2X	A0
+	LDW	.D2T1	*++B15[2],A0
+	NOP	4
+	NOP
+	NOP
+	.endm
+#else /* CONFIG_C6X_BIG_KERNEL */
+	B	.S2	\handler
+	NOP
+	NOP
+	NOP
+	NOP
+	NOP
+	NOP
+	NOP
+	.endm
+#endif /* CONFIG_C6X_BIG_KERNEL */
+
+	   .sect ".vectors","ax"
+	   .align ALIGNMENT
+	   .global RESET
+	   .hidden RESET
+RESET:
+#ifdef CONFIG_C6X_BIG_KERNEL
+	   MVKL	.S1	_c_int00,A0		; branch to _c_int00
+	   MVKH	.S1	_c_int00,A0
+	   B	.S2X	A0
+#else
+	   B	.S2	_c_int00
+	   NOP
+	   NOP
+#endif
+	   NOP
+	   NOP
+	   NOP
+	   NOP
+	   NOP
+
+
+	   IRQVEC NMI,_nmi_handler		; NMI interrupt
+	   IRQVEC AINT,_bad_interrupt		; reserved
+	   IRQVEC MSGINT,_bad_interrupt		; reserved
+
+	   IRQVEC INT4,_int4_handler
+	   IRQVEC INT5,_int5_handler
+	   IRQVEC INT6,_int6_handler
+	   IRQVEC INT7,_int7_handler
+	   IRQVEC INT8,_int8_handler
+	   IRQVEC INT9,_int9_handler
+	   IRQVEC INT10,_int10_handler
+	   IRQVEC INT11,_int11_handler
+	   IRQVEC INT12,_int12_handler
+	   IRQVEC INT13,_int13_handler
+	   IRQVEC INT14,_int14_handler
+	   IRQVEC INT15,_int15_handler
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
new file mode 100644
index 0000000..1d81c4c
--- /dev/null
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -0,0 +1,162 @@
+/*
+ * ld script for the c6x kernel
+ *
+ *  Copyright (C) 2010, 2011 Texas Instruments Incorporated
+ *  Mark Salter <msalter@redhat.com>
+ */
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+ENTRY(_c_int00)
+
+#if defined(CONFIG_CPU_BIG_ENDIAN)
+jiffies = jiffies_64 + 4;
+#else
+jiffies = jiffies_64;
+#endif
+
+#define	READONLY_SEGMENT_START	\
+	. = PAGE_OFFSET;
+#define	READWRITE_SEGMENT_START	\
+	. = ALIGN(128);		\
+	_data_lma = .;
+
+SECTIONS
+{
+	/*
+	 * Start kernel read only segment
+	 */
+	READONLY_SEGMENT_START
+
+	.vectors :
+	{
+		_vectors_start = .;
+		*(.vectors)
+		. = ALIGN(0x400);
+		_vectors_end = .;
+	}
+
+	. = ALIGN(0x1000);
+	.cmdline :
+	{
+		*(.cmdline)
+	}
+
+	/*
+	 * This section contains data which may be shared with other
+	 * cores. It needs to be a fixed offset from PAGE_OFFSET
+	 * regardless of kernel configuration.
+	 */
+	.virtio_ipc_dev :
+	{
+		*(.virtio_ipc_dev)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.init :
+	{
+		_stext = .;
+		_sinittext = .;
+		HEAD_TEXT
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	__init_begin = _stext;
+	INIT_DATA_SECTION(16)
+
+	PERCPU_SECTION(128)
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.text :
+	{
+		_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		IRQENTRY_TEXT
+		KPROBES_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	}
+
+	EXCEPTION_TABLE(16)
+	NOTES
+
+	RO_DATA_SECTION(PAGE_SIZE)
+	.const :
+	{
+		*(.const .const.* .gnu.linkonce.r.*)
+		*(.switch)
+	}
+
+	. = ALIGN (8) ;
+	__fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET)
+	{
+		_fdt_start = . ;	/* place for fdt blob */
+		*(__fdt_blob) ;		/* Any link-placed DTB */
+		BYTE(0);		/* section always has contents */
+	        . = _fdt_start + 0x4000;	/* Pad up to 16kbyte */
+		_fdt_end = . ;
+	}
+
+	_etext = .;
+
+	/*
+	 * Start kernel read-write segment.
+	 */
+	READWRITE_SEGMENT_START
+	_sdata = .;
+
+	.fardata : AT(ADDR(.fardata) - LOAD_OFFSET)
+	{
+		INIT_TASK_DATA(THREAD_SIZE)
+		NOSAVE_DATA
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
+		CACHELINE_ALIGNED_DATA(128)
+		READ_MOSTLY_DATA(128)
+		DATA_DATA
+		CONSTRUCTORS
+		*(.data1)
+		*(.fardata .fardata.*)
+		*(.data.debug_bpt)
+	}
+
+	.neardata ALIGN(8) : AT(ADDR(.neardata) - LOAD_OFFSET)
+	{
+		*(.neardata2 .neardata2.* .gnu.linkonce.s2.*)
+		*(.neardata .neardata.* .gnu.linkonce.s.*)
+		. = ALIGN(8);
+	}
+
+	_edata = .;
+
+	__bss_start = .;
+	SBSS(8)
+	BSS(8)
+	.far :
+	{
+		. = ALIGN(8);
+		*(.dynfar)
+		*(.far .far.* .gnu.linkonce.b.*)
+		. = ALIGN(8);
+	}
+	__bss_stop = .;
+
+	_end = .;
+
+	DWARF_DEBUG
+
+	/DISCARD/ :
+	{
+		  EXIT_TEXT
+		  EXIT_DATA
+		  EXIT_CALL
+		  *(.discard)
+		  *(.discard.*)
+		  *(.interp)
+	}
+}
diff --git a/arch/c6x/lib/Makefile b/arch/c6x/lib/Makefile
new file mode 100644
index 0000000..ffd3c65
--- /dev/null
+++ b/arch/c6x/lib/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for arch/c6x/lib/
+#
+
+lib-y := divu.o divi.o pop_rts.o push_rts.o remi.o remu.o strasgi.o llshru.o
+lib-y += llshr.o llshl.o negll.o mpyll.o divremi.o divremu.o
+lib-y += checksum.o csum_64plus.o memcpy_64plus.o strasgi_64plus.o
diff --git a/arch/c6x/lib/checksum.c b/arch/c6x/lib/checksum.c
new file mode 100644
index 0000000..67cc93b
--- /dev/null
+++ b/arch/c6x/lib/checksum.c
@@ -0,0 +1,36 @@
+/*
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+			    __wsum sum, int *csum_err)
+{
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
+		*csum_err = 0;
+
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/* These are from csum_64plus.S */
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy);
+EXPORT_SYMBOL(ip_compute_csum);
+EXPORT_SYMBOL(ip_fast_csum);
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
new file mode 100644
index 0000000..6d25896
--- /dev/null
+++ b/arch/c6x/lib/csum_64plus.S
@@ -0,0 +1,419 @@
+;
+;  linux/arch/c6x/lib/csum_64plus.s
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+
+;
+;unsigned int csum_partial_copy(const char *src, char * dst,
+;				int len, int sum)
+;
+; A4:	src
+; B4:	dst
+; A6:	len
+; B6:	sum
+; return csum in A4
+;
+
+	.text
+ENTRY(csum_partial_copy)
+	MVC	.S2	ILC,B30
+
+	MV	.D1X	B6,A31		; given csum
+	ZERO	.D1	A9		; csum (a side)
+||	ZERO	.D2	B9		; csum (b side)
+||	SHRU	.S2X	A6,2,B5		; len / 4
+
+	;; Check alignment and size
+	AND	.S1	3,A4,A1
+||	AND	.S2	3,B4,B0
+	OR	.L2X	B0,A1,B0	; non aligned condition
+||	MVC	.S2	B5,ILC
+||	MVK	.D2	1,B2
+||	MV	.D1X	B5,A1		; words condition
+  [!A1]	B	.S1	L8
+   [B0] BNOP	.S1	L6,5
+
+	SPLOOP		1
+
+	;; Main loop for aligned words
+	LDW	.D1T1	*A4++,A7
+	NOP	4
+	MV	.S2X	A7,B7
+||	EXTU	.S1	A7,0,16,A16
+	STW	.D2T2	B7,*B4++
+||	MPYU	.M2	B7,B2,B8
+||	ADD	.L1	A16,A9,A9
+	NOP
+	SPKERNEL	8,0
+||	ADD	.L2	B8,B9,B9
+
+	ZERO	.D1	A1
+||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
+
+L6:
+  [!A1]	BNOP	.S1	L8,5
+
+	;; Main loop for non-aligned words
+	SPLOOP		2
+ ||	MVK	.L1	1,A2
+
+	LDNW	.D1T1	*A4++,A7
+	NOP		3
+
+	NOP
+	MV	.S2X	A7,B7
+ ||	EXTU	.S1	A7,0,16,A16
+ ||	MPYU	.M1	A7,A2,A8
+
+	ADD	.L1	A16,A9,A9
+	SPKERNEL	6,0
+ ||	STNW	.D2T2	B7,*B4++
+ ||	ADD	.L1	A8,A9,A9
+
+L8:	AND	.S2X	2,A6,B5
+	CMPGT	.L2	B5,0,B0
+  [!B0]	BNOP	.S1	L82,4
+
+	;; Manage half-word
+	ZERO	.L1	A7
+||	ZERO	.D1	A8
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+	LDBU	.D1T1	*A4++,A7
+	LDBU	.D1T1	*A4++,A8
+	NOP		3
+	SHL	.S1	A7,8,A0
+	ADD	.S1	A8,A9,A9
+	STB	.D2T1	A7,*B4++
+||	ADD	.S1	A0,A9,A9
+	STB	.D2T1	A8,*B4++
+
+#else
+
+	LDBU	.D1T1	*A4++,A7
+	LDBU	.D1T1	*A4++,A8
+	NOP		3
+	ADD	.S1	A7,A9,A9
+	SHL	.S1	A8,8,A0
+
+	STB	.D2T1	A7,*B4++
+||	ADD	.S1	A0,A9,A9
+	STB	.D2T1	A8,*B4++
+
+#endif
+
+	;; Manage eventually the last byte
+L82:	AND	.S2X	1,A6,B0
+  [!B0]	BNOP	.S1	L9,5
+
+||	ZERO	.L1	A7
+
+L83:	LDBU	.D1T1	*A4++,A7
+	NOP		4
+
+	MV	.L2X	A7,B7
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+	STB	.D2T2	B7,*B4++
+||	SHL	.S1	A7,8,A7
+	ADD	.S1	A7,A9,A9
+
+#else
+
+	STB	.D2T2	B7,*B4++
+||	ADD	.S1	A7,A9,A9
+
+#endif
+
+	;; Fold the csum
+L9:	SHRU	.S2X	A9,16,B0
+  [!B0]	BNOP	.S1	L10,5
+
+L91:	SHRU	.S2X	A9,16,B4
+||	EXTU	.S1	A9,16,16,A3
+	ADD	.D1X	A3,B4,A9
+
+	SHRU	.S1	A9,16,A0
+   [A0]	BNOP	.S1	L91,5
+
+L10:	ADD	.D1	A31,A9,A9
+	MV	.D1	A9,A4
+
+	BNOP	.S2	B3,4
+	MVC	.S2	B30,ILC
+ENDPROC(csum_partial_copy)
+
+;
+;unsigned short
+;ip_fast_csum(unsigned char *iph, unsigned int ihl)
+;{
+;	unsigned int checksum = 0;
+;	unsigned short *tosum = (unsigned short *) iph;
+;	int len;
+;
+;	len = ihl*4;
+;
+;	if (len <= 0)
+;		return 0;
+;
+;	while(len) {
+;		len -= 2;
+;		checksum += *tosum++;
+;	}
+;	if (len & 1)
+;		checksum += *(unsigned char*) tosum;
+;
+;	while(checksum >> 16)
+;		checksum = (checksum & 0xffff) + (checksum >> 16);
+;
+;	return ~checksum;
+;}
+;
+; A4:	iph
+; B4:	ihl
+; return checksum in A4
+;
+	.text
+
+ENTRY(ip_fast_csum)
+	ZERO	.D1	A5
+ ||	MVC	.S2	ILC,B30
+	SHL	.S2	B4,2,B0
+	CMPGT	.L2	B0,0,B1
+  [!B1] BNOP	.S1	L15,4
+  [!B1]	ZERO	.D1	A3
+
+  [!B0]	B	.S1	L12
+	SHRU	.S2	B0,1,B0
+	MVC	.S2	B0,ILC
+	NOP	3
+
+	SPLOOP	1
+	LDHU	.D1T1	*A4++,A3
+	NOP	3
+	NOP
+	SPKERNEL	5,0
+ ||	ADD	.L1	A3,A5,A5
+
+L12:	SHRU	.S1	A5,16,A0
+  [!A0]	BNOP	.S1	L14,5
+
+L13:	SHRU	.S2X	A5,16,B4
+	EXTU	.S1	A5,16,16,A3
+	ADD	.D1X	A3,B4,A5
+	SHRU	.S1	A5,16,A0
+  [A0]	BNOP	.S1	L13,5
+
+L14:	NOT	.D1	A5,A3
+	EXTU	.S1	A3,16,16,A3
+
+L15:	BNOP	.S2	B3,3
+	MVC	.S2	B30,ILC
+	MV	.D1	A3,A4
+ENDPROC(ip_fast_csum)
+
+;
+;unsigned short
+;do_csum(unsigned char *buff, unsigned int len)
+;{
+;	int odd, count;
+;	unsigned int result = 0;
+;
+;	if (len <= 0)
+;		goto out;
+;	odd = 1 & (unsigned long) buff;
+;	if (odd) {
+;#ifdef __LITTLE_ENDIAN
+;		result += (*buff << 8);
+;#else
+;		result = *buff;
+;#endif
+;		len--;
+;		buff++;
+;	}
+;	count = len >> 1;		/* nr of 16-bit words.. */
+;	if (count) {
+;		if (2 & (unsigned long) buff) {
+;			result += *(unsigned short *) buff;
+;			count--;
+;			len -= 2;
+;			buff += 2;
+;		}
+;		count >>= 1;		/* nr of 32-bit words.. */
+;		if (count) {
+;			unsigned int carry = 0;
+;			do {
+;				unsigned int w = *(unsigned int *) buff;
+;				count--;
+;				buff += 4;
+;				result += carry;
+;				result += w;
+;				carry = (w > result);
+;			} while (count);
+;			result += carry;
+;			result = (result & 0xffff) + (result >> 16);
+;		}
+;		if (len & 2) {
+;			result += *(unsigned short *) buff;
+;			buff += 2;
+;		}
+;	}
+;	if (len & 1)
+;#ifdef __LITTLE_ENDIAN
+;		result += *buff;
+;#else
+;		result += (*buff << 8);
+;#endif
+;	result = (result & 0xffff) + (result >> 16);
+;	/* add up carry.. */
+;	result = (result & 0xffff) + (result >> 16);
+;	if (odd)
+;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+;out:
+;	return result;
+;}
+;
+; A4:	buff
+; B4:	len
+; return checksum in A4
+;
+
+ENTRY(do_csum)
+	   CMPGT   .L2	   B4,0,B0
+   [!B0]   BNOP    .S1	   L26,3
+	   EXTU    .S1	   A4,31,31,A0
+
+	   MV	   .L1	   A0,A3
+||	   MV	   .S1X    B3,A5
+||	   MV	   .L2	   B4,B3
+||	   ZERO    .D1	   A1
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [A0]    SUB	   .L2	   B3,1,B3
+|| [A0]    LDBU    .D1T1   *A4++,A1
+#else
+   [!A0]   BNOP    .S1	   L21,5
+|| [A0]    LDBU    .D1T1   *A4++,A0
+	   SUB	   .L2	   B3,1,B3
+||	   SHL	   .S1	   A0,8,A1
+L21:
+#endif
+	   SHR	   .S2	   B3,1,B0
+   [!B0]   BNOP    .S1	   L24,3
+	   MVK	   .L1	   2,A0
+	   AND	   .L1	   A4,A0,A0
+
+   [!A0]   BNOP    .S1	   L22,5
+|| [A0]    LDHU    .D1T1   *A4++,A0
+	   SUB	   .L2	   B0,1,B0
+||	   SUB	   .S2	   B3,2,B3
+||	   ADD	   .L1	   A0,A1,A1
+L22:
+	   SHR	   .S2	   B0,1,B0
+||	   ZERO    .L1	   A0
+
+   [!B0]   BNOP    .S1	   L23,5
+|| [B0]    MVC	   .S2	   B0,ILC
+
+	   SPLOOP  3
+	   SPMASK  L1
+||	   MV	   .L1	   A1,A2
+||	   LDW	   .D1T1   *A4++,A1
+
+	   NOP	   4
+	   ADD	   .L1	   A0,A1,A0
+	   ADD	   .L1	   A2,A0,A2
+
+	   SPKERNEL 1,2
+||	   CMPGTU  .L1	   A1,A2,A0
+
+	   ADD	   .L1	   A0,A2,A6
+	   EXTU    .S1	   A6,16,16,A7
+	   SHRU    .S2X    A6,16,B0
+	   NOP		   1
+	   ADD	   .L1X    A7,B0,A1
+L23:
+	   MVK	   .L2	   2,B0
+	   AND	   .L2	   B3,B0,B0
+   [B0]    LDHU    .D1T1   *A4++,A0
+	   NOP	   4
+   [B0]    ADD	   .L1	   A0,A1,A1
+L24:
+	   EXTU    .S2	   B3,31,31,B0
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [!B0]   BNOP    .S1	   L25,4
+|| [B0]    LDBU    .D1T1   *A4,A0
+	   SHL	   .S1	   A0,8,A0
+	   ADD	   .L1	   A0,A1,A1
+L25:
+#else
+   [B0]    LDBU    .D1T1   *A4,A0
+	   NOP	   4
+   [B0]    ADD	   .L1	   A0,A1,A1
+#endif
+	   EXTU    .S1	   A1,16,16,A0
+	   SHRU    .S2X    A1,16,B0
+	   NOP	   1
+	   ADD	   .L1X    A0,B0,A0
+	   SHRU    .S1	   A0,16,A1
+	   ADD	   .L1	   A0,A1,A0
+	   EXTU    .S1	   A0,16,16,A1
+	   EXTU    .S1	   A1,16,24,A2
+
+	   EXTU    .S1	   A1,24,16,A0
+||	   MV	   .L2X    A3,B0
+
+   [B0]    OR	   .L1	   A0,A2,A1
+L26:
+	   NOP	   1
+	   BNOP    .S2X    A5,4
+	   MV	   .L1	   A1,A4
+ENDPROC(do_csum)
+
+;__wsum csum_partial(const void *buff, int len, __wsum wsum)
+;{
+;	unsigned int sum = (__force unsigned int)wsum;
+;	unsigned int result = do_csum(buff, len);
+;
+;	/* add in old sum, and carry.. */
+;	result += sum;
+;	if (sum > result)
+;		result += 1;
+;	return (__force __wsum)result;
+;}
+;
+ENTRY(csum_partial)
+	   MV	   .L1X    B3,A9
+||	   CALLP   .S2	   do_csum,B3
+||	   MV	   .S1	   A6,A8
+	   BNOP    .S2X    A9,2
+	   ADD	   .L1	   A8,A4,A1
+	   CMPGTU  .L1	   A8,A1,A0
+	   ADD	   .L1	   A1,A0,A4
+ENDPROC(csum_partial)
+
+;unsigned short
+;ip_compute_csum(unsigned char *buff, unsigned int len)
+;
+; A4:	buff
+; B4:	len
+; return checksum in A4
+
+ENTRY(ip_compute_csum)
+	   MV	   .L1X    B3,A9
+||	   CALLP   .S2	   do_csum,B3
+	   BNOP    .S2X    A9,3
+	   NOT	   .S1	   A4,A4
+	   CLR     .S1	   A4,16,31,A4
+ENDPROC(ip_compute_csum)
diff --git a/arch/c6x/lib/divi.S b/arch/c6x/lib/divi.S
new file mode 100644
index 0000000..4bde924
--- /dev/null
+++ b/arch/c6x/lib/divi.S
@@ -0,0 +1,53 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	;; ABI considerations for the divide functions
+	;; The following registers are call-used:
+	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+	;;
+	;; In our implementation, divu and remu are leaf functions,
+	;; while both divi and remi call into divu.
+	;; A0 is not clobbered by any of the functions.
+	;; divu does not clobber B2 either, which is taken advantage of
+	;; in remi.
+	;; divi uses B5 to hold the original return address during
+	;; the call to divu.
+	;; remi uses B2 and A5 to hold the input values during the
+	;; call to divu.  It stores B3 in on the stack.
+
+	.text
+ENTRY(__c6xabi_divi)
+	call	.s2	__c6xabi_divu
+||	mv	.d2	B3, B5
+||	cmpgt	.l1	0, A4, A1
+||	cmpgt	.l2	0, B4, B1
+
+   [A1]	neg	.l1	A4, A4
+|| [B1]	neg	.l2	B4, B4
+||	xor	.s1x	A1, B1, A1
+   [A1] addkpc	.s2	_divu_ret, B3, 4
+_divu_ret:
+	neg	.l1	A4, A4
+||	mv	.l2	B3,B5
+||	ret	.s2	B5
+	nop		5
+ENDPROC(__c6xabi_divi)
diff --git a/arch/c6x/lib/divremi.S b/arch/c6x/lib/divremi.S
new file mode 100644
index 0000000..64bc5aa
--- /dev/null
+++ b/arch/c6x/lib/divremi.S
@@ -0,0 +1,46 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_divremi)
+	stw	.d2t2	B3, *B15--[2]
+||	cmpgt	.l1	0, A4, A1
+||	cmpgt	.l2	0, B4, B2
+||	mv	.s1	A4, A5
+||	call	.s2	__c6xabi_divu
+
+   [A1]	neg	.l1	A4, A4
+|| [B2]	neg	.l2	B4, B4
+||	xor	.s2x	B2, A1, B0
+||	mv	.d2	B4, B2
+
+   [B0]	addkpc	.s2	_divu_ret_1, B3, 1
+  [!B0] addkpc	.s2	_divu_ret_2, B3, 1
+	nop	2
+_divu_ret_1:
+	neg	.l1	A4, A4
+_divu_ret_2:
+	ldw	.d2t2	*++B15[2], B3
+
+	mpy32	.m1x	A4, B2, A6
+	nop		3
+	ret	.s2	B3
+	sub	.l1	A5, A6, A5
+	nop	4
+ENDPROC(__c6xabi_divremi)
diff --git a/arch/c6x/lib/divremu.S b/arch/c6x/lib/divremu.S
new file mode 100644
index 0000000..caa9f23
--- /dev/null
+++ b/arch/c6x/lib/divremu.S
@@ -0,0 +1,87 @@
+;;  Copyright 2011  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_divremu)
+	;; We use a series of up to 31 subc instructions.  First, we find
+	;; out how many leading zero bits there are in the divisor.  This
+	;; gives us both a shift count for aligning (shifting) the divisor
+	;; to the, and the number of times we have to execute subc.
+
+	;; At the end, we have both the remainder and most of the quotient
+	;; in A4.  The top bit of the quotient is computed first and is
+	;; placed in A2.
+
+	;; Return immediately if the dividend is zero.	Setting B4 to 1
+	;; is a trick to allow us to leave the following insns in the jump
+	;; delay slot without affecting the result.
+	mv	.s2x	A4, B1
+
+  [b1]	lmbd	.l2	1, B4, B1
+||[!b1] b	.s2	B3	; RETURN A
+||[!b1] mvk	.d2	1, B4
+
+||[!b1] zero	.s1	A5
+	mv	.l1x	B1, A6
+||	shl	.s2	B4, B1, B4
+
+	;; The loop performs a maximum of 28 steps, so we do the
+	;; first 3 here.
+	cmpltu	.l1x	A4, B4, A2
+  [!A2]	sub	.l1x	A4, B4, A4
+||	shru	.s2	B4, 1, B4
+||	xor	.s1	1, A2, A2
+
+	shl	.s1	A2, 31, A2
+|| [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+
+	;; RETURN A may happen here (note: must happen before the next branch)
+__divremu0:
+	cmpgt	.l2	B1, 7, B0
+|| [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+|| [b0] b	.s1	__divremu0
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+   [b1]	subc	.l1x	A4,B4,A4
+|| [b1]	add	.s2	-1, B1, B1
+	;; loop backwards branch happens here
+
+	ret	.s2	B3
+||	mvk	.s1	32, A1
+	sub	.l1	A1, A6, A6
+||	extu	.s1	A4, A6, A5
+	shl	.s1	A4, A6, A4
+	shru	.s1	A4, 1, A4
+||	sub	.l1	A6, 1, A6
+	or	.l1	A2, A4, A4
+	shru	.s1	A4, A6, A4
+	nop
+ENDPROC(__c6xabi_divremu)
diff --git a/arch/c6x/lib/divu.S b/arch/c6x/lib/divu.S
new file mode 100644
index 0000000..64af3c0
--- /dev/null
+++ b/arch/c6x/lib/divu.S
@@ -0,0 +1,98 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	;; ABI considerations for the divide functions
+	;; The following registers are call-used:
+	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+	;;
+	;; In our implementation, divu and remu are leaf functions,
+	;; while both divi and remi call into divu.
+	;; A0 is not clobbered by any of the functions.
+	;; divu does not clobber B2 either, which is taken advantage of
+	;; in remi.
+	;; divi uses B5 to hold the original return address during
+	;; the call to divu.
+	;; remi uses B2 and A5 to hold the input values during the
+	;; call to divu.  It stores B3 in on the stack.
+
+	.text
+ENTRY(__c6xabi_divu)
+	;; We use a series of up to 31 subc instructions.  First, we find
+	;; out how many leading zero bits there are in the divisor.  This
+	;; gives us both a shift count for aligning (shifting) the divisor
+	;; to the, and the number of times we have to execute subc.
+
+	;; At the end, we have both the remainder and most of the quotient
+	;; in A4.  The top bit of the quotient is computed first and is
+	;; placed in A2.
+
+	;; Return immediately if the dividend is zero.
+	 mv	.s2x	A4, B1
+   [B1]	 lmbd	.l2	1, B4, B1
+|| [!B1] b	.s2	B3	; RETURN A
+|| [!B1] mvk	.d2	1, B4
+	 mv	.l1x	B1, A6
+||	 shl	.s2	B4, B1, B4
+
+	;; The loop performs a maximum of 28 steps, so we do the
+	;; first 3 here.
+	 cmpltu	.l1x	A4, B4, A2
+   [!A2] sub	.l1x	A4, B4, A4
+||	 shru	.s2	B4, 1, B4
+||	 xor	.s1	1, A2, A2
+
+	 shl	.s1	A2, 31, A2
+|| [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+
+	;; RETURN A may happen here (note: must happen before the next branch)
+_divu_loop:
+	 cmpgt	.l2	B1, 7, B0
+|| [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+|| [B0]  b	.s1	_divu_loop
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+	;; loop backwards branch happens here
+
+	 ret	.s2	B3
+||	 mvk	.s1	32, A1
+	 sub	.l1	A1, A6, A6
+	 shl	.s1	A4, A6, A4
+	 shru	.s1	A4, 1, A4
+||	 sub	.l1	A6, 1, A6
+	 or	.l1	A2, A4, A4
+	 shru	.s1	A4, A6, A4
+	 nop
+ENDPROC(__c6xabi_divu)
diff --git a/arch/c6x/lib/llshl.S b/arch/c6x/lib/llshl.S
new file mode 100644
index 0000000..7b105e2
--- /dev/null
+++ b/arch/c6x/lib/llshl.S
@@ -0,0 +1,37 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;;  uint64_t __c6xabi_llshl(uint64_t val, uint shift)
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_llshl)
+	 mv	.l1x	B4,A1
+   [!A1] b	.s2	B3		; just return if zero shift
+	 mvk	.s1	32,A0
+	 sub	.d1	A0,A1,A0
+	 cmplt	.l1	0,A0,A2
+   [A2]	 shru	.s1	A4,A0,A0
+   [!A2] neg	.l1	A0,A5
+|| [A2]  shl	.s1	A5,A1,A5
+   [!A2] shl	.s1	A4,A5,A5
+|| [A2]  or	.d1	A5,A0,A5
+|| [!A2] mvk	.l1	0,A4
+   [A2]	 shl	.s1	A4,A1,A4
+	 bnop	.s2	B3,5
+ENDPROC(__c6xabi_llshl)
diff --git a/arch/c6x/lib/llshr.S b/arch/c6x/lib/llshr.S
new file mode 100644
index 0000000..fde1bec
--- /dev/null
+++ b/arch/c6x/lib/llshr.S
@@ -0,0 +1,38 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;;  uint64_t __c6xabi_llshr(uint64_t val, uint shift)
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_llshr)
+	 mv	.l1x	B4,A1
+   [!A1] b	.s2	B3		; return if zero shift count
+	 mvk	.s1	32,A0
+	 sub	.d1	A0,A1,A0
+	 cmplt	.l1	0,A0,A2
+   [A2]  shl	.s1	A5,A0,A0
+	 nop
+   [!A2] neg	.l1	A0,A4
+|| [A2]  shru	.s1	A4,A1,A4
+   [!A2] shr	.s1	A5,A4,A4
+|| [A2]  or	.d1	A4,A0,A4
+   [!A2] shr	.s1	A5,0x1f,A5
+   [A2]  shr	.s1	A5,A1,A5
+	 bnop	.s2	B3,5
+ENDPROC(__c6xabi_llshr)
diff --git a/arch/c6x/lib/llshru.S b/arch/c6x/lib/llshru.S
new file mode 100644
index 0000000..596ae3f
--- /dev/null
+++ b/arch/c6x/lib/llshru.S
@@ -0,0 +1,38 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;;  uint64_t __c6xabi_llshru(uint64_t val, uint shift)
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_llshru)
+	 mv	.l1x	B4,A1
+   [!A1] b	.s2	B3		; return if zero shift count
+	 mvk	.s1	32,A0
+	 sub	.d1	A0,A1,A0
+	 cmplt	.l1	0,A0,A2
+   [A2]  shl	.s1	A5,A0,A0
+	 nop
+   [!A2] neg	.l1	A0,A4
+|| [A2]  shru	.s1	A4,A1,A4
+   [!A2] shru	.s1	A5,A4,A4
+|| [A2]  or	.d1	A4,A0,A4
+|| [!A2] mvk	.l1	0,A5
+   [A2]  shru	.s1	A5,A1,A5
+	 bnop	.s2	B3,5
+ENDPROC(__c6xabi_llshru)
diff --git a/arch/c6x/lib/memcpy_64plus.S b/arch/c6x/lib/memcpy_64plus.S
new file mode 100644
index 0000000..0bbc2cb
--- /dev/null
+++ b/arch/c6x/lib/memcpy_64plus.S
@@ -0,0 +1,46 @@
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2006, 2009, 2010 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY(memcpy)
+	AND	.L1	0x1,A6,A0
+ ||	AND	.S1	0x2,A6,A1
+ ||	AND	.L2X	0x4,A6,B0
+ ||	MV	.D1	A4,A3
+ ||	MVC	.S2	ILC,B2
+
+   [A0] LDB	.D2T1	*B4++,A5
+   [A1] LDB	.D2T1	*B4++,A7
+   [A1] LDB	.D2T1	*B4++,A8
+   [B0] LDNW	.D2T1	*B4++,A9
+ ||	SHRU	.S2X	A6,0x3,B1
+  [!B1] BNOP	.S2	B3,1
+
+   [A0] STB	.D1T1	A5,*A3++
+ ||[B1] MVC	.S2	B1,ILC
+   [A1] STB	.D1T1	A7,*A3++
+   [A1] STB	.D1T1	A8,*A3++
+   [B0] STNW	.D1T1	A9,*A3++	; return when len < 8
+
+	SPLOOP	2
+
+	LDNDW	.D2T1	*B4++,A9:A8
+	NOP	3
+
+	NOP
+	SPKERNEL	0,0
+ ||	STNDW	.D1T1	A9:A8,*A3++
+
+	BNOP	.S2	B3,4
+	MVC	.S2	B2,ILC
+ENDPROC(memcpy)
diff --git a/arch/c6x/lib/mpyll.S b/arch/c6x/lib/mpyll.S
new file mode 100644
index 0000000..f103441
--- /dev/null
+++ b/arch/c6x/lib/mpyll.S
@@ -0,0 +1,49 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y)
+	;;
+	;; 64x64 multiply
+	;; First compute partial results using 32-bit parts of x and y:
+	;;
+	;;   b63	 b32 b31	  b0
+	;;    -----------------------------
+	;;    |      1	    |	   0	  |
+	;;    -----------------------------
+	;;
+	;;   P0 = X0*Y0
+	;;   P1 = X0*Y1 + X1*Y0
+	;;   P2 = X1*Y1
+	;;
+	;;   result = (P2 << 64) + (P1 << 32) + P0
+	;;
+	;; Since the result is also 64-bit, we can skip the P2 term.
+
+	.text
+ENTRY(__c6xabi_mpyll)
+	mpy32u	.m1x	A4,B4,A1:A0	; X0*Y0
+	b	.s2	B3
+ ||	mpy32u	.m2x	B5,A4,B1:B0	; X0*Y1 (don't need upper 32-bits)
+ ||	mpy32u	.m1x	A5,B4,A3:A2	; X1*Y0 (don't need upper 32-bits)
+	nop
+	nop
+	mv	.s1	A0,A4
+	add	.l1x	A2,B0,A5
+	add	.s1	A1,A5,A5
+ENDPROC(__c6xabi_mpyll)
diff --git a/arch/c6x/lib/negll.S b/arch/c6x/lib/negll.S
new file mode 100644
index 0000000..82f4bce
--- /dev/null
+++ b/arch/c6x/lib/negll.S
@@ -0,0 +1,31 @@
+;;  Copyright (C) 2010 Texas Instruments Incorporated
+;;  Contributed by Mark Salter <msalter@redhat.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;;  int64_t __c6xabi_negll(int64_t val)
+
+#include <linux/linkage.h>
+
+	.text
+ENTRY(__c6xabi_negll)
+	b	.s2	B3
+	mvk	.l1	0,A0
+	subu	.l1	A0,A4,A3:A2
+	sub	.l1	A0,A5,A0
+||	ext	.s1	A3,24,24,A5
+	add	.l1	A5,A0,A5
+	mv	.s1	A2,A4
+ENDPROC(__c6xabi_negll)
diff --git a/arch/c6x/lib/pop_rts.S b/arch/c6x/lib/pop_rts.S
new file mode 100644
index 0000000..d7d96c7
--- /dev/null
+++ b/arch/c6x/lib/pop_rts.S
@@ -0,0 +1,32 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY(__c6xabi_pop_rts)
+	lddw	.d2t2	*++B15, B3:B2
+	lddw	.d2t1	*++B15, A11:A10
+	lddw	.d2t2	*++B15, B11:B10
+	lddw	.d2t1	*++B15, A13:A12
+	lddw	.d2t2	*++B15, B13:B12
+	lddw	.d2t1	*++B15, A15:A14
+||	b	.s2	B3
+	ldw	.d2t2	*++B15[2], B14
+	nop	4
+ENDPROC(__c6xabi_pop_rts)
diff --git a/arch/c6x/lib/push_rts.S b/arch/c6x/lib/push_rts.S
new file mode 100644
index 0000000..f6e3db3
--- /dev/null
+++ b/arch/c6x/lib/push_rts.S
@@ -0,0 +1,31 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY(__c6xabi_push_rts)
+	stw	.d2t2	B14, *B15--[2]
+	stdw	.d2t1	A15:A14, *B15--
+||	b	.s2x	A3
+	stdw	.d2t2	B13:B12, *B15--
+	stdw	.d2t1	A13:A12, *B15--
+	stdw	.d2t2	B11:B10, *B15--
+	stdw	.d2t1	A11:A10, *B15--
+	stdw	.d2t2	B3:B2, *B15--
+ENDPROC(__c6xabi_push_rts)
diff --git a/arch/c6x/lib/remi.S b/arch/c6x/lib/remi.S
new file mode 100644
index 0000000..6f2ca18
--- /dev/null
+++ b/arch/c6x/lib/remi.S
@@ -0,0 +1,64 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	;; ABI considerations for the divide functions
+	;; The following registers are call-used:
+	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+	;;
+	;; In our implementation, divu and remu are leaf functions,
+	;; while both divi and remi call into divu.
+	;; A0 is not clobbered by any of the functions.
+	;; divu does not clobber B2 either, which is taken advantage of
+	;; in remi.
+	;; divi uses B5 to hold the original return address during
+	;; the call to divu.
+	;; remi uses B2 and A5 to hold the input values during the
+	;; call to divu.  It stores B3 in on the stack.
+
+	.text
+
+ENTRY(__c6xabi_remi)
+	stw	.d2t2	B3, *B15--[2]
+||	cmpgt	.l1	0, A4, A1
+||	cmpgt	.l2	0, B4, B2
+||	mv	.s1	A4, A5
+||	call	.s2	__c6xabi_divu
+
+   [A1]	neg	.l1	A4, A4
+|| [B2]	neg	.l2	B4, B4
+||	xor	.s2x	B2, A1, B0
+||	mv	.d2	B4, B2
+
+   [B0]	addkpc	.s2	_divu_ret_1, B3, 1
+  [!B0] addkpc	.s2	_divu_ret_2, B3, 1
+	nop	2
+_divu_ret_1:
+	neg	.l1	A4, A4
+_divu_ret_2:
+	ldw	.d2t2	*++B15[2], B3
+
+	mpy32	.m1x	A4, B2, A6
+	nop		3
+	ret	.s2	B3
+	sub	.l1	A5, A6, A4
+	nop	4
+ENDPROC(__c6xabi_remi)
diff --git a/arch/c6x/lib/remu.S b/arch/c6x/lib/remu.S
new file mode 100644
index 0000000..3fae719
--- /dev/null
+++ b/arch/c6x/lib/remu.S
@@ -0,0 +1,82 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	;; ABI considerations for the divide functions
+	;; The following registers are call-used:
+	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+	;;
+	;; In our implementation, divu and remu are leaf functions,
+	;; while both divi and remi call into divu.
+	;; A0 is not clobbered by any of the functions.
+	;; divu does not clobber B2 either, which is taken advantage of
+	;; in remi.
+	;; divi uses B5 to hold the original return address during
+	;; the call to divu.
+	;; remi uses B2 and A5 to hold the input values during the
+	;; call to divu.  It stores B3 in on the stack.
+
+
+	.text
+
+ENTRY(__c6xabi_remu)
+	;; The ABI seems designed to prevent these functions calling each other,
+	;; so we duplicate most of the divsi3 code here.
+	 mv	.s2x	A4, B1
+	 lmbd	.l2	1, B4, B1
+|| [!B1] b	.s2	B3	; RETURN A
+|| [!B1] mvk	.d2	1, B4
+
+	 mv	.l1x	B1, A7
+||	 shl	.s2	B4, B1, B4
+
+	 cmpltu	.l1x	A4, B4, A1
+   [!A1] sub	.l1x	A4, B4, A4
+	 shru	.s2	B4, 1, B4
+
+_remu_loop:
+	 cmpgt	.l2	B1, 7, B0
+|| [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+	;; RETURN A may happen here (note: must happen before the next branch)
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+|| [B0]	 b	.s1	_remu_loop
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+	;; loop backwards branch happens here
+
+	 ret	.s2	B3
+   [B1]	 subc	.l1x	A4,B4,A4
+|| [B1]	 add	.s2	-1, B1, B1
+   [B1]	 subc	.l1x	A4,B4,A4
+
+	 extu	.s1	A4, A7, A4
+	 nop	2
+ENDPROC(__c6xabi_remu)
diff --git a/arch/c6x/lib/strasgi.S b/arch/c6x/lib/strasgi.S
new file mode 100644
index 0000000..de274076
--- /dev/null
+++ b/arch/c6x/lib/strasgi.S
@@ -0,0 +1,89 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY(__c6xabi_strasgi)
+	;; This is essentially memcpy, with alignment known to be at least
+	;; 4, and the size a multiple of 4 greater than or equal to 28.
+	 ldw	.d2t1	*B4++, A0
+||	 mvk	.s2	16, B1
+	 ldw	.d2t1	*B4++, A1
+||	 mvk	.s2	20, B2
+||	 sub	.d1	A6, 24, A6
+	 ldw	.d2t1	*B4++, A5
+	 ldw	.d2t1	*B4++, A7
+||	 mv	.l2x	A6, B7
+	 ldw	.d2t1	*B4++, A8
+	 ldw	.d2t1	*B4++, A9
+||	 mv	.s2x	A0, B5
+||	 cmpltu	.l2	B2, B7, B0
+
+_strasgi_loop:
+	 stw	.d1t2	B5, *A4++
+|| [B0]	 ldw	.d2t1	*B4++, A0
+||	 mv	.s2x	A1, B5
+||	 mv	.l2	B7, B6
+
+   [B0]	 sub	.d2	B6, 24, B7
+|| [B0]	 b	.s2	_strasgi_loop
+||	 cmpltu	.l2	B1, B6, B0
+
+   [B0]	 ldw	.d2t1	*B4++, A1
+||	 stw	.d1t2	B5, *A4++
+||	 mv	.s2x	A5, B5
+||	 cmpltu	.l2	12, B6, B0
+
+   [B0]	 ldw	.d2t1	*B4++, A5
+||	 stw	.d1t2	B5, *A4++
+||	 mv	.s2x	A7, B5
+||	 cmpltu	.l2	8, B6, B0
+
+   [B0]	 ldw	.d2t1	*B4++, A7
+||	 stw	.d1t2	B5, *A4++
+||	 mv	.s2x	A8, B5
+||	 cmpltu	.l2	4, B6, B0
+
+   [B0]	 ldw	.d2t1	*B4++, A8
+||	 stw	.d1t2	B5, *A4++
+||	 mv	.s2x	A9, B5
+||	 cmpltu	.l2	0, B6, B0
+
+   [B0]	 ldw	.d2t1	*B4++, A9
+||	 stw	.d1t2	B5, *A4++
+||	 mv	.s2x	A0, B5
+||	 cmpltu	.l2	B2, B7, B0
+
+	;; loop back branch happens here
+
+	 cmpltu	.l2	B1, B6, B0
+||	 ret	.s2	b3
+
+   [B0]	 stw	.d1t1	A1, *A4++
+||	 cmpltu	.l2	12, B6, B0
+   [B0]	 stw	.d1t1	A5, *A4++
+||	 cmpltu	.l2	8, B6, B0
+   [B0]	 stw	.d1t1	A7, *A4++
+||	 cmpltu	.l2	4, B6, B0
+   [B0]	 stw	.d1t1	A8, *A4++
+||	 cmpltu	.l2	0, B6, B0
+   [B0]	 stw	.d1t1	A9, *A4++
+
+	;; return happens here
+ENDPROC(__c6xabi_strasgi)
diff --git a/arch/c6x/lib/strasgi_64plus.S b/arch/c6x/lib/strasgi_64plus.S
new file mode 100644
index 0000000..c9fd159
--- /dev/null
+++ b/arch/c6x/lib/strasgi_64plus.S
@@ -0,0 +1,39 @@
+;;  Copyright 2010  Free Software Foundation, Inc.
+;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY(__c6xabi_strasgi_64plus)
+	shru	.s2x	a6, 2, b31
+||	mv	.s1	a4, a30
+||	mv	.d2	b4, b30
+
+	add	.s2	-4, b31, b31
+
+	sploopd		1
+||	mvc	.s2	b31, ilc
+	ldw	.d2t2	*b30++, b31
+	nop	4
+	mv	.s1x	b31,a31
+	spkernel	6, 0
+||	stw	.d1t1	a31, *a30++
+
+	ret	.s2	b3
+	nop 5
+ENDPROC(__c6xabi_strasgi_64plus)
diff --git a/arch/c6x/mm/Makefile b/arch/c6x/mm/Makefile
new file mode 100644
index 0000000..136a975
--- /dev/null
+++ b/arch/c6x/mm/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the linux c6x-specific parts of the memory manager.
+#
+
+obj-y := init.o dma-coherent.o
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
new file mode 100644
index 0000000..4187e51
--- /dev/null
+++ b/arch/c6x/mm/dma-coherent.c
@@ -0,0 +1,143 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  DMA uncached mapping support.
+ *
+ *  Using code pulled from ARM
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ */
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/memblock.h>
+
+#include <asm/page.h>
+
+/*
+ * DMA coherent memory management, can be redefined using the memdma=
+ * kernel command line
+ */
+
+/* none by default */
+static phys_addr_t dma_base;
+static u32 dma_size;
+static u32 dma_pages;
+
+static unsigned long *dma_bitmap;
+
+/* bitmap lock */
+static DEFINE_SPINLOCK(dma_lock);
+
+/*
+ * Return a DMA coherent and contiguous memory chunk from the DMA memory
+ */
+static inline u32 __alloc_dma_pages(int order)
+{
+	unsigned long flags;
+	u32 pos;
+
+	spin_lock_irqsave(&dma_lock, flags);
+	pos = bitmap_find_free_region(dma_bitmap, dma_pages, order);
+	spin_unlock_irqrestore(&dma_lock, flags);
+
+	return dma_base + (pos << PAGE_SHIFT);
+}
+
+static void __free_dma_pages(u32 addr, int order)
+{
+	unsigned long flags;
+	u32 pos = (addr - dma_base) >> PAGE_SHIFT;
+
+	if (addr < dma_base || (pos + (1 << order)) >= dma_pages) {
+		printk(KERN_ERR "%s: freeing outside range.\n", __func__);
+		BUG();
+	}
+
+	spin_lock_irqsave(&dma_lock, flags);
+	bitmap_release_region(dma_bitmap, pos, order);
+	spin_unlock_irqrestore(&dma_lock, flags);
+}
+
+/*
+ * Allocate DMA coherent memory space and return both the kernel
+ * virtual and DMA address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t gfp)
+{
+	u32 paddr;
+	int order;
+
+	if (!dma_size || !size)
+		return NULL;
+
+	order = get_count_order(((size - 1) >> PAGE_SHIFT) + 1);
+
+	paddr = __alloc_dma_pages(order);
+
+	if (handle)
+		*handle = paddr;
+
+	if (!paddr)
+		return NULL;
+
+	return phys_to_virt(paddr);
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * Free DMA coherent memory as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+		       dma_addr_t dma_handle)
+{
+	int order;
+
+	if (!dma_size || !size)
+		return;
+
+	order = get_count_order(((size - 1) >> PAGE_SHIFT) + 1);
+
+	__free_dma_pages(virt_to_phys(vaddr), order);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * Initialise the coherent DMA memory allocator using the given uncached region.
+ */
+void __init coherent_mem_init(phys_addr_t start, u32 size)
+{
+	phys_addr_t bitmap_phys;
+
+	if (!size)
+		return;
+
+	printk(KERN_INFO
+	       "Coherent memory (DMA) region start=0x%x size=0x%x\n",
+	       start, size);
+
+	dma_base = start;
+	dma_size = size;
+
+	/* allocate bitmap */
+	dma_pages = dma_size >> PAGE_SHIFT;
+	if (dma_size & (PAGE_SIZE - 1))
+		++dma_pages;
+
+	bitmap_phys = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
+				     sizeof(long));
+
+	dma_bitmap = phys_to_virt(bitmap_phys);
+	memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
+}
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
new file mode 100644
index 0000000..89395f0
--- /dev/null
+++ b/arch/c6x/mm/init.c
@@ -0,0 +1,113 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
+ *  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#ifdef CONFIG_BLK_DEV_RAM
+#include <linux/blkdev.h>
+#endif
+#include <linux/initrd.h>
+
+#include <asm/sections.h>
+
+/*
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+/*
+ * paging_init() continues the virtual memory environment setup which
+ * was begun by the code in arch/head.S.
+ * The parameters are pointers to where to stick the starting and ending
+ * addresses  of available kernel virtual memory.
+ */
+void __init paging_init(void)
+{
+	struct pglist_data *pgdat = NODE_DATA(0);
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
+
+	empty_zero_page      = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+	memset((void *)empty_zero_page, 0, PAGE_SIZE);
+
+	/*
+	 * Set up user data space
+	 */
+	set_fs(KERNEL_DS);
+
+	/*
+	 * Define zones
+	 */
+	zones_size[ZONE_NORMAL] = (memory_end - PAGE_OFFSET) >> PAGE_SHIFT;
+	pgdat->node_zones[ZONE_NORMAL].zone_start_pfn =
+		__pa(PAGE_OFFSET) >> PAGE_SHIFT;
+
+	free_area_init(zones_size);
+}
+
+void __init mem_init(void)
+{
+	int codek, datak;
+	unsigned long tmp;
+	unsigned long len = memory_end - memory_start;
+
+	high_memory = (void *)(memory_end & PAGE_MASK);
+
+	/* this will put all memory onto the freelists */
+	totalram_pages = free_all_bootmem();
+
+	codek = (_etext - _stext) >> 10;
+	datak = (_end - _sdata) >> 10;
+
+	tmp = nr_free_pages() << PAGE_SHIFT;
+	printk(KERN_INFO "Memory: %luk/%luk RAM (%dk kernel code, %dk data)\n",
+	       tmp >> 10, len >> 10, codek, datak);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+	int pages = 0;
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		init_page_count(virt_to_page(start));
+		free_page(start);
+		totalram_pages++;
+		pages++;
+	}
+	printk(KERN_INFO "Freeing initrd memory: %luk freed\n",
+	       (pages * PAGE_SIZE) >> 10);
+}
+#endif
+
+void __init free_initmem(void)
+{
+	unsigned long addr;
+
+	/*
+	 * The following code should be cool even if these sections
+	 * are not page aligned.
+	 */
+	addr = PAGE_ALIGN((unsigned long)(__init_begin));
+
+	/* next to check that the page we free is not a partial page */
+	for (; addr + PAGE_SIZE < (unsigned long)(__init_end);
+	     addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing unused kernel memory: %dK freed\n",
+	       (int) ((addr - PAGE_ALIGN((long) &__init_begin)) >> 10));
+}
diff --git a/arch/c6x/platforms/Kconfig b/arch/c6x/platforms/Kconfig
new file mode 100644
index 0000000..401ee67
--- /dev/null
+++ b/arch/c6x/platforms/Kconfig
@@ -0,0 +1,16 @@
+
+config SOC_TMS320C6455
+	bool "TMS320C6455"
+	default n
+
+config SOC_TMS320C6457
+	bool "TMS320C6457"
+	default n
+
+config SOC_TMS320C6472
+	bool "TMS320C6472"
+	default n
+
+config SOC_TMS320C6474
+	bool "TMS320C6474"
+	default n
diff --git a/arch/c6x/platforms/Makefile b/arch/c6x/platforms/Makefile
new file mode 100644
index 0000000..9a95b9b
--- /dev/null
+++ b/arch/c6x/platforms/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for arch/c6x/platforms
+#
+# Copyright 2010, 2011 Texas Instruments Incorporated
+#
+
+obj-y = platform.o cache.o megamod-pic.o pll.o plldata.o timer64.o
+obj-y += dscr.o
+
+# SoC objects
+obj-$(CONFIG_SOC_TMS320C6455)   += emif.o
+obj-$(CONFIG_SOC_TMS320C6457)   += emif.o
diff --git a/arch/c6x/platforms/cache.c b/arch/c6x/platforms/cache.c
new file mode 100644
index 0000000..86318a1
--- /dev/null
+++ b/arch/c6x/platforms/cache.c
@@ -0,0 +1,445 @@
+/*
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <asm/cache.h>
+#include <asm/soc.h>
+
+/*
+ * Internal Memory Control Registers for caches
+ */
+#define IMCR_CCFG	  0x0000
+#define IMCR_L1PCFG	  0x0020
+#define IMCR_L1PCC	  0x0024
+#define IMCR_L1DCFG	  0x0040
+#define IMCR_L1DCC	  0x0044
+#define IMCR_L2ALLOC0	  0x2000
+#define IMCR_L2ALLOC1	  0x2004
+#define IMCR_L2ALLOC2	  0x2008
+#define IMCR_L2ALLOC3	  0x200c
+#define IMCR_L2WBAR	  0x4000
+#define IMCR_L2WWC	  0x4004
+#define IMCR_L2WIBAR	  0x4010
+#define IMCR_L2WIWC	  0x4014
+#define IMCR_L2IBAR	  0x4018
+#define IMCR_L2IWC	  0x401c
+#define IMCR_L1PIBAR	  0x4020
+#define IMCR_L1PIWC	  0x4024
+#define IMCR_L1DWIBAR	  0x4030
+#define IMCR_L1DWIWC	  0x4034
+#define IMCR_L1DWBAR	  0x4040
+#define IMCR_L1DWWC	  0x4044
+#define IMCR_L1DIBAR	  0x4048
+#define IMCR_L1DIWC	  0x404c
+#define IMCR_L2WB	  0x5000
+#define IMCR_L2WBINV	  0x5004
+#define IMCR_L2INV	  0x5008
+#define IMCR_L1PINV	  0x5028
+#define IMCR_L1DWB	  0x5040
+#define IMCR_L1DWBINV	  0x5044
+#define IMCR_L1DINV	  0x5048
+#define IMCR_MAR_BASE	  0x8000
+#define IMCR_MAR96_111	  0x8180
+#define IMCR_MAR128_191   0x8200
+#define IMCR_MAR224_239   0x8380
+#define IMCR_L2MPFAR	  0xa000
+#define IMCR_L2MPFSR	  0xa004
+#define IMCR_L2MPFCR	  0xa008
+#define IMCR_L2MPLK0	  0xa100
+#define IMCR_L2MPLK1	  0xa104
+#define IMCR_L2MPLK2	  0xa108
+#define IMCR_L2MPLK3	  0xa10c
+#define IMCR_L2MPLKCMD	  0xa110
+#define IMCR_L2MPLKSTAT   0xa114
+#define IMCR_L2MPPA_BASE  0xa200
+#define IMCR_L1PMPFAR	  0xa400
+#define IMCR_L1PMPFSR	  0xa404
+#define IMCR_L1PMPFCR	  0xa408
+#define IMCR_L1PMPLK0	  0xa500
+#define IMCR_L1PMPLK1	  0xa504
+#define IMCR_L1PMPLK2	  0xa508
+#define IMCR_L1PMPLK3	  0xa50c
+#define IMCR_L1PMPLKCMD   0xa510
+#define IMCR_L1PMPLKSTAT  0xa514
+#define IMCR_L1PMPPA_BASE 0xa600
+#define IMCR_L1DMPFAR	  0xac00
+#define IMCR_L1DMPFSR	  0xac04
+#define IMCR_L1DMPFCR	  0xac08
+#define IMCR_L1DMPLK0	  0xad00
+#define IMCR_L1DMPLK1	  0xad04
+#define IMCR_L1DMPLK2	  0xad08
+#define IMCR_L1DMPLK3	  0xad0c
+#define IMCR_L1DMPLKCMD   0xad10
+#define IMCR_L1DMPLKSTAT  0xad14
+#define IMCR_L1DMPPA_BASE 0xae00
+#define IMCR_L2PDWAKE0	  0xc040
+#define IMCR_L2PDWAKE1	  0xc044
+#define IMCR_L2PDSLEEP0   0xc050
+#define IMCR_L2PDSLEEP1   0xc054
+#define IMCR_L2PDSTAT0	  0xc060
+#define IMCR_L2PDSTAT1	  0xc064
+
+/*
+ * CCFG register values and bits
+ */
+#define L2MODE_0K_CACHE   0x0
+#define L2MODE_32K_CACHE  0x1
+#define L2MODE_64K_CACHE  0x2
+#define L2MODE_128K_CACHE 0x3
+#define L2MODE_256K_CACHE 0x7
+
+#define L2PRIO_URGENT     0x0
+#define L2PRIO_HIGH       0x1
+#define L2PRIO_MEDIUM     0x2
+#define L2PRIO_LOW        0x3
+
+#define CCFG_ID           0x100   /* Invalidate L1P bit */
+#define CCFG_IP           0x200   /* Invalidate L1D bit */
+
+static void __iomem *cache_base;
+
+/*
+ * L1 & L2 caches generic functions
+ */
+#define imcr_get(reg) soc_readl(cache_base + (reg))
+#define imcr_set(reg, value) \
+do {								\
+	soc_writel((value), cache_base + (reg));		\
+	soc_readl(cache_base + (reg));				\
+} while (0)
+
+static void cache_block_operation_wait(unsigned int wc_reg)
+{
+	/* Wait for completion */
+	while (imcr_get(wc_reg))
+		cpu_relax();
+}
+
+static DEFINE_SPINLOCK(cache_lock);
+
+/*
+ * Generic function to perform a block cache operation as
+ * invalidate or writeback/invalidate
+ */
+static void cache_block_operation(unsigned int *start,
+				  unsigned int *end,
+				  unsigned int bar_reg,
+				  unsigned int wc_reg)
+{
+	unsigned long flags;
+	unsigned int wcnt =
+		(L2_CACHE_ALIGN_CNT((unsigned int) end)
+		 - L2_CACHE_ALIGN_LOW((unsigned int) start)) >> 2;
+	unsigned int wc = 0;
+
+	for (; wcnt; wcnt -= wc, start += wc) {
+loop:
+		spin_lock_irqsave(&cache_lock, flags);
+
+		/*
+		 * If another cache operation is occuring
+		 */
+		if (unlikely(imcr_get(wc_reg))) {
+			spin_unlock_irqrestore(&cache_lock, flags);
+
+			/* Wait for previous operation completion */
+			cache_block_operation_wait(wc_reg);
+
+			/* Try again */
+			goto loop;
+		}
+
+		imcr_set(bar_reg, L2_CACHE_ALIGN_LOW((unsigned int) start));
+
+		if (wcnt > 0xffff)
+			wc = 0xffff;
+		else
+			wc = wcnt;
+
+		/* Set word count value in the WC register */
+		imcr_set(wc_reg, wc & 0xffff);
+
+		spin_unlock_irqrestore(&cache_lock, flags);
+
+		/* Wait for completion */
+		cache_block_operation_wait(wc_reg);
+	}
+}
+
+static void cache_block_operation_nowait(unsigned int *start,
+					 unsigned int *end,
+					 unsigned int bar_reg,
+					 unsigned int wc_reg)
+{
+	unsigned long flags;
+	unsigned int wcnt =
+		(L2_CACHE_ALIGN_CNT((unsigned int) end)
+		 - L2_CACHE_ALIGN_LOW((unsigned int) start)) >> 2;
+	unsigned int wc = 0;
+
+	for (; wcnt; wcnt -= wc, start += wc) {
+
+		spin_lock_irqsave(&cache_lock, flags);
+
+		imcr_set(bar_reg, L2_CACHE_ALIGN_LOW((unsigned int) start));
+
+		if (wcnt > 0xffff)
+			wc = 0xffff;
+		else
+			wc = wcnt;
+
+		/* Set word count value in the WC register */
+		imcr_set(wc_reg, wc & 0xffff);
+
+		spin_unlock_irqrestore(&cache_lock, flags);
+
+		/* Don't wait for completion on last cache operation */
+		if (wcnt > 0xffff)
+			cache_block_operation_wait(wc_reg);
+	}
+}
+
+/*
+ * L1 caches management
+ */
+
+/*
+ * Disable L1 caches
+ */
+void L1_cache_off(void)
+{
+	unsigned int dummy;
+
+	imcr_set(IMCR_L1PCFG, 0);
+	dummy = imcr_get(IMCR_L1PCFG);
+
+	imcr_set(IMCR_L1DCFG, 0);
+	dummy = imcr_get(IMCR_L1DCFG);
+}
+
+/*
+ * Enable L1 caches
+ */
+void L1_cache_on(void)
+{
+	unsigned int dummy;
+
+	imcr_set(IMCR_L1PCFG, 7);
+	dummy = imcr_get(IMCR_L1PCFG);
+
+	imcr_set(IMCR_L1DCFG, 7);
+	dummy = imcr_get(IMCR_L1DCFG);
+}
+
+/*
+ *  L1P global-invalidate all
+ */
+void L1P_cache_global_invalidate(void)
+{
+	unsigned int set = 1;
+	imcr_set(IMCR_L1PINV, set);
+	while (imcr_get(IMCR_L1PINV) & 1)
+		cpu_relax();
+}
+
+/*
+ *  L1D global-invalidate all
+ *
+ * Warning: this operation causes all updated data in L1D to
+ * be discarded rather than written back to the lower levels of
+ * memory
+ */
+void L1D_cache_global_invalidate(void)
+{
+	unsigned int set = 1;
+	imcr_set(IMCR_L1DINV, set);
+	while (imcr_get(IMCR_L1DINV) & 1)
+		cpu_relax();
+}
+
+void L1D_cache_global_writeback(void)
+{
+	unsigned int set = 1;
+	imcr_set(IMCR_L1DWB, set);
+	while (imcr_get(IMCR_L1DWB) & 1)
+		cpu_relax();
+}
+
+void L1D_cache_global_writeback_invalidate(void)
+{
+	unsigned int set = 1;
+	imcr_set(IMCR_L1DWBINV, set);
+	while (imcr_get(IMCR_L1DWBINV) & 1)
+		cpu_relax();
+}
+
+/*
+ * L2 caches management
+ */
+
+/*
+ * Set L2 operation mode
+ */
+void L2_cache_set_mode(unsigned int mode)
+{
+	unsigned int ccfg = imcr_get(IMCR_CCFG);
+
+	/* Clear and set the L2MODE bits in CCFG */
+	ccfg &= ~7;
+	ccfg |= (mode & 7);
+	imcr_set(IMCR_CCFG, ccfg);
+	ccfg = imcr_get(IMCR_CCFG);
+}
+
+/*
+ *  L2 global-writeback and global-invalidate all
+ */
+void L2_cache_global_writeback_invalidate(void)
+{
+	imcr_set(IMCR_L2WBINV, 1);
+	while (imcr_get(IMCR_L2WBINV))
+		cpu_relax();
+}
+
+/*
+ *  L2 global-writeback all
+ */
+void L2_cache_global_writeback(void)
+{
+	imcr_set(IMCR_L2WB, 1);
+	while (imcr_get(IMCR_L2WB))
+		cpu_relax();
+}
+
+/*
+ * Cacheability controls
+ */
+void enable_caching(unsigned long start, unsigned long end)
+{
+	unsigned int mar = IMCR_MAR_BASE + ((start >> 24) << 2);
+	unsigned int mar_e = IMCR_MAR_BASE + ((end >> 24) << 2);
+
+	for (; mar <= mar_e; mar += 4)
+		imcr_set(mar, imcr_get(mar) | 1);
+}
+
+void disable_caching(unsigned long start, unsigned long end)
+{
+	unsigned int mar = IMCR_MAR_BASE + ((start >> 24) << 2);
+	unsigned int mar_e = IMCR_MAR_BASE + ((end >> 24) << 2);
+
+	for (; mar <= mar_e; mar += 4)
+		imcr_set(mar, imcr_get(mar) & ~1);
+}
+
+
+/*
+ *  L1 block operations
+ */
+void L1P_cache_block_invalidate(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L1PIBAR, IMCR_L1PIWC);
+}
+
+void L1D_cache_block_invalidate(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L1DIBAR, IMCR_L1DIWC);
+}
+
+void L1D_cache_block_writeback_invalidate(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L1DWIBAR, IMCR_L1DWIWC);
+}
+
+void L1D_cache_block_writeback(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L1DWBAR, IMCR_L1DWWC);
+}
+
+/*
+ *  L2 block operations
+ */
+void L2_cache_block_invalidate(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L2IBAR, IMCR_L2IWC);
+}
+
+void L2_cache_block_writeback(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L2WBAR, IMCR_L2WWC);
+}
+
+void L2_cache_block_writeback_invalidate(unsigned int start, unsigned int end)
+{
+	cache_block_operation((unsigned int *) start,
+			      (unsigned int *) end,
+			      IMCR_L2WIBAR, IMCR_L2WIWC);
+}
+
+void L2_cache_block_invalidate_nowait(unsigned int start, unsigned int end)
+{
+	cache_block_operation_nowait((unsigned int *) start,
+				     (unsigned int *) end,
+				     IMCR_L2IBAR, IMCR_L2IWC);
+}
+
+void L2_cache_block_writeback_nowait(unsigned int start, unsigned int end)
+{
+	cache_block_operation_nowait((unsigned int *) start,
+				     (unsigned int *) end,
+				     IMCR_L2WBAR, IMCR_L2WWC);
+}
+
+void L2_cache_block_writeback_invalidate_nowait(unsigned int start,
+						unsigned int end)
+{
+	cache_block_operation_nowait((unsigned int *) start,
+				     (unsigned int *) end,
+				     IMCR_L2WIBAR, IMCR_L2WIWC);
+}
+
+
+/*
+ * L1 and L2 caches configuration
+ */
+void __init c6x_cache_init(void)
+{
+	struct device_node *node;
+
+	node = of_find_compatible_node(NULL, NULL, "ti,c64x+cache");
+	if (!node)
+		return;
+
+	cache_base = of_iomap(node, 0);
+
+	of_node_put(node);
+
+	if (!cache_base)
+		return;
+
+	/* Set L2 caches on the the whole L2 SRAM memory */
+	L2_cache_set_mode(L2MODE_SIZE);
+
+	/* Enable L1 */
+	L1_cache_on();
+}
diff --git a/arch/c6x/platforms/dscr.c b/arch/c6x/platforms/dscr.c
new file mode 100644
index 0000000..f848a65
--- /dev/null
+++ b/arch/c6x/platforms/dscr.c
@@ -0,0 +1,598 @@
+/*
+ *  Device State Control Registers driver
+ *
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * The Device State Control Registers (DSCR) provide SoC level control over
+ * a number of peripherals. Details vary considerably among the various SoC
+ * parts. In general, the DSCR block will provide one or more configuration
+ * registers often protected by a lock register. One or more key values must
+ * be written to a lock register in order to unlock the configuration register.
+ * The configuration register may be used to enable (and disable in some
+ * cases) SoC pin drivers, peripheral clock sources (internal or pin), etc.
+ * In some cases, a configuration register is write once or the individual
+ * bits are write once. That is, you may be able to enable a device, but
+ * will not be able to disable it.
+ *
+ * In addition to device configuration, the DSCR block may provide registers
+ * which are used to reset SoC peripherals, provide device ID information,
+ * provide MAC addresses, and other miscellaneous functions.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <asm/soc.h>
+#include <asm/dscr.h>
+
+#define MAX_DEVSTATE_IDS   32
+#define MAX_DEVCTL_REGS     8
+#define MAX_DEVSTAT_REGS    8
+#define MAX_LOCKED_REGS     4
+#define MAX_SOC_EMACS       2
+
+struct rmii_reset_reg {
+	u32 reg;
+	u32 mask;
+};
+
+/*
+ * Some registerd may be locked. In order to write to these
+ * registers, the key value must first be written to the lockreg.
+ */
+struct locked_reg {
+	u32 reg;	/* offset from base */
+	u32 lockreg;	/* offset from base */
+	u32 key;	/* unlock key */
+};
+
+/*
+ * This describes a contiguous area of like control bits used to enable/disable
+ * SoC devices. Each controllable device is given an ID which is used by the
+ * individual device drivers to control the device state. These IDs start at
+ * zero and are assigned sequentially to the control bitfield ranges described
+ * by this structure.
+ */
+struct devstate_ctl_reg {
+	u32 reg;		/* register holding the control bits */
+	u8  start_id;		/* start id of this range */
+	u8  num_ids;		/* number of devices in this range */
+	u8  enable_only;	/* bits are write-once to enable only */
+	u8  enable;		/* value used to enable device */
+	u8  disable;		/* value used to disable device */
+	u8  shift;		/* starting (rightmost) bit in range */
+	u8  nbits;		/* number of bits per device */
+};
+
+
+/*
+ * This describes a region of status bits indicating the state of
+ * various devices. This is used internally to wait for status
+ * change completion when enabling/disabling a device. Status is
+ * optional and not all device controls will have a corresponding
+ * status.
+ */
+struct devstate_stat_reg {
+	u32 reg;		/* register holding the status bits */
+	u8  start_id;		/* start id of this range */
+	u8  num_ids;		/* number of devices in this range */
+	u8  enable;		/* value indicating enabled state */
+	u8  disable;		/* value indicating disabled state */
+	u8  shift;		/* starting (rightmost) bit in range */
+	u8  nbits;		/* number of bits per device */
+};
+
+struct devstate_info {
+	struct devstate_ctl_reg *ctl;
+	struct devstate_stat_reg *stat;
+};
+
+/* These are callbacks to SOC-specific code. */
+struct dscr_ops {
+	void (*init)(struct device_node *node);
+};
+
+struct dscr_regs {
+	spinlock_t		lock;
+	void __iomem		*base;
+	u32			kick_reg[2];
+	u32			kick_key[2];
+	struct locked_reg	locked[MAX_LOCKED_REGS];
+	struct devstate_info	devstate_info[MAX_DEVSTATE_IDS];
+	struct rmii_reset_reg   rmii_resets[MAX_SOC_EMACS];
+	struct devstate_ctl_reg devctl[MAX_DEVCTL_REGS];
+	struct devstate_stat_reg devstat[MAX_DEVSTAT_REGS];
+};
+
+static struct dscr_regs	dscr;
+
+static struct locked_reg *find_locked_reg(u32 reg)
+{
+	int i;
+
+	for (i = 0; i < MAX_LOCKED_REGS; i++)
+		if (dscr.locked[i].key && reg == dscr.locked[i].reg)
+			return &dscr.locked[i];
+	return NULL;
+}
+
+/*
+ * Write to a register with one lock
+ */
+static void dscr_write_locked1(u32 reg, u32 val,
+			       u32 lock, u32 key)
+{
+	void __iomem *reg_addr = dscr.base + reg;
+	void __iomem *lock_addr = dscr.base + lock;
+
+	/*
+	 * For some registers, the lock is relocked after a short number
+	 * of cycles. We have to put the lock write and register write in
+	 * the same fetch packet to meet this timing. The .align ensures
+	 * the two stw instructions are in the same fetch packet.
+	 */
+	asm volatile ("b	.s2	0f\n"
+		      "nop	5\n"
+		      "    .align 5\n"
+		      "0:\n"
+		      "stw	.D1T2	%3,*%2\n"
+		      "stw	.D1T2	%1,*%0\n"
+		      :
+		      : "a"(reg_addr), "b"(val), "a"(lock_addr), "b"(key)
+		);
+
+	/* in case the hw doesn't reset the lock */
+	soc_writel(0, lock_addr);
+}
+
+/*
+ * Write to a register protected by two lock registers
+ */
+static void dscr_write_locked2(u32 reg, u32 val,
+			       u32 lock0, u32 key0,
+			       u32 lock1, u32 key1)
+{
+	soc_writel(key0, dscr.base + lock0);
+	soc_writel(key1, dscr.base + lock1);
+	soc_writel(val, dscr.base + reg);
+	soc_writel(0, dscr.base + lock0);
+	soc_writel(0, dscr.base + lock1);
+}
+
+static void dscr_write(u32 reg, u32 val)
+{
+	struct locked_reg *lock;
+
+	lock = find_locked_reg(reg);
+	if (lock)
+		dscr_write_locked1(reg, val, lock->lockreg, lock->key);
+	else if (dscr.kick_key[0])
+		dscr_write_locked2(reg, val, dscr.kick_reg[0], dscr.kick_key[0],
+				   dscr.kick_reg[1], dscr.kick_key[1]);
+	else
+		soc_writel(val, dscr.base + reg);
+}
+
+
+/*
+ * Drivers can use this interface to enable/disable SoC IP blocks.
+ */
+void dscr_set_devstate(int id, enum dscr_devstate_t state)
+{
+	struct devstate_ctl_reg *ctl;
+	struct devstate_stat_reg *stat;
+	struct devstate_info *info;
+	u32 ctl_val, val;
+	int ctl_shift, ctl_mask;
+	unsigned long flags;
+
+	if (!dscr.base)
+		return;
+
+	if (id < 0 || id >= MAX_DEVSTATE_IDS)
+		return;
+
+	info = &dscr.devstate_info[id];
+	ctl = info->ctl;
+	stat = info->stat;
+
+	if (ctl == NULL)
+		return;
+
+	ctl_shift = ctl->shift + ctl->nbits * (id - ctl->start_id);
+	ctl_mask = ((1 << ctl->nbits) - 1) << ctl_shift;
+
+	switch (state) {
+	case DSCR_DEVSTATE_ENABLED:
+		ctl_val = ctl->enable << ctl_shift;
+		break;
+	case DSCR_DEVSTATE_DISABLED:
+		if (ctl->enable_only)
+			return;
+		ctl_val = ctl->disable << ctl_shift;
+		break;
+	default:
+		return;
+	}
+
+	spin_lock_irqsave(&dscr.lock, flags);
+
+	val = soc_readl(dscr.base + ctl->reg);
+	val &= ~ctl_mask;
+	val |= ctl_val;
+
+	dscr_write(ctl->reg, val);
+
+	spin_unlock_irqrestore(&dscr.lock, flags);
+
+	if (!stat)
+		return;
+
+	ctl_shift = stat->shift + stat->nbits * (id - stat->start_id);
+
+	if (state == DSCR_DEVSTATE_ENABLED)
+		ctl_val = stat->enable;
+	else
+		ctl_val = stat->disable;
+
+	do {
+		val = soc_readl(dscr.base + stat->reg);
+		val >>= ctl_shift;
+		val &= ((1 << stat->nbits) - 1);
+	} while (val != ctl_val);
+}
+EXPORT_SYMBOL(dscr_set_devstate);
+
+/*
+ * Drivers can use this to reset RMII module.
+ */
+void dscr_rmii_reset(int id, int assert)
+{
+	struct rmii_reset_reg *r;
+	unsigned long flags;
+	u32 val;
+
+	if (id < 0 || id >= MAX_SOC_EMACS)
+		return;
+
+	r = &dscr.rmii_resets[id];
+	if (r->mask == 0)
+		return;
+
+	spin_lock_irqsave(&dscr.lock, flags);
+
+	val = soc_readl(dscr.base + r->reg);
+	if (assert)
+		dscr_write(r->reg, val | r->mask);
+	else
+		dscr_write(r->reg, val & ~(r->mask));
+
+	spin_unlock_irqrestore(&dscr.lock, flags);
+}
+EXPORT_SYMBOL(dscr_rmii_reset);
+
+static void __init dscr_parse_devstat(struct device_node *node,
+				      void __iomem *base)
+{
+	u32 val;
+	int err;
+
+	err = of_property_read_u32_array(node, "ti,dscr-devstat", &val, 1);
+	if (!err)
+		c6x_devstat = soc_readl(base + val);
+	printk(KERN_INFO "DEVSTAT: %08x\n", c6x_devstat);
+}
+
+static void __init dscr_parse_silicon_rev(struct device_node *node,
+					 void __iomem *base)
+{
+	u32 vals[3];
+	int err;
+
+	err = of_property_read_u32_array(node, "ti,dscr-silicon-rev", vals, 3);
+	if (!err) {
+		c6x_silicon_rev = soc_readl(base + vals[0]);
+		c6x_silicon_rev >>= vals[1];
+		c6x_silicon_rev &= vals[2];
+	}
+}
+
+/*
+ * Some SoCs will have a pair of fuse registers which hold
+ * an ethernet MAC address. The "ti,dscr-mac-fuse-regs"
+ * property is a mapping from fuse register bytes to MAC
+ * address bytes. The expected format is:
+ *
+ *	ti,dscr-mac-fuse-regs = <reg0 b3 b2 b1 b0
+ *				 reg1 b3 b2 b1 b0>
+ *
+ * reg0 and reg1 are the offsets of the two fuse registers.
+ * b3-b0 positionally represent bytes within the fuse register.
+ * b3 is the most significant byte and b0 is the least.
+ * Allowable values for b3-b0 are:
+ *
+ *	  0 = fuse register byte not used in MAC address
+ *      1-6 = index+1 into c6x_fuse_mac[]
+ */
+static void __init dscr_parse_mac_fuse(struct device_node *node,
+				       void __iomem *base)
+{
+	u32 vals[10], fuse;
+	int f, i, j, err;
+
+	err = of_property_read_u32_array(node, "ti,dscr-mac-fuse-regs",
+					 vals, 10);
+	if (err)
+		return;
+
+	for (f = 0; f < 2; f++) {
+		fuse = soc_readl(base + vals[f * 5]);
+		for (j = (f * 5) + 1, i = 24; i >= 0; i -= 8, j++)
+			if (vals[j] && vals[j] <= 6)
+				c6x_fuse_mac[vals[j] - 1] = fuse >> i;
+	}
+}
+
+static void __init dscr_parse_rmii_resets(struct device_node *node,
+					  void __iomem *base)
+{
+	const __be32 *p;
+	int i, size;
+
+	/* look for RMII reset registers */
+	p = of_get_property(node, "ti,dscr-rmii-resets", &size);
+	if (p) {
+		/* parse all the reg/mask pairs we can handle */
+		size /= (sizeof(*p) * 2);
+		if (size > MAX_SOC_EMACS)
+			size = MAX_SOC_EMACS;
+
+		for (i = 0; i < size; i++) {
+			dscr.rmii_resets[i].reg = be32_to_cpup(p++);
+			dscr.rmii_resets[i].mask = be32_to_cpup(p++);
+		}
+	}
+}
+
+
+static void __init dscr_parse_privperm(struct device_node *node,
+				       void __iomem *base)
+{
+	u32 vals[2];
+	int err;
+
+	err = of_property_read_u32_array(node, "ti,dscr-privperm", vals, 2);
+	if (err)
+		return;
+	dscr_write(vals[0], vals[1]);
+}
+
+/*
+ * SoCs may have "locked" DSCR registers which can only be written
+ * to only after writing a key value to a lock registers. These
+ * regisers can be described with the "ti,dscr-locked-regs" property.
+ * This property provides a list of register descriptions with each
+ * description consisting of three values.
+ *
+ *	ti,dscr-locked-regs = <reg0 lockreg0 key0
+ *                               ...
+ *                             regN lockregN keyN>;
+ *
+ * reg is the offset of the locked register
+ * lockreg is the offset of the lock register
+ * key is the unlock key written to lockreg
+ *
+ */
+static void __init dscr_parse_locked_regs(struct device_node *node,
+					  void __iomem *base)
+{
+	struct locked_reg *r;
+	const __be32 *p;
+	int i, size;
+
+	p = of_get_property(node, "ti,dscr-locked-regs", &size);
+	if (p) {
+		/* parse all the register descriptions we can handle */
+		size /= (sizeof(*p) * 3);
+		if (size > MAX_LOCKED_REGS)
+			size = MAX_LOCKED_REGS;
+
+		for (i = 0; i < size; i++) {
+			r = &dscr.locked[i];
+
+			r->reg = be32_to_cpup(p++);
+			r->lockreg = be32_to_cpup(p++);
+			r->key = be32_to_cpup(p++);
+		}
+	}
+}
+
+/*
+ * SoCs may have DSCR registers which are only write enabled after
+ * writing specific key values to two registers. The two key registers
+ * and the key values can be parsed from a "ti,dscr-kick-regs"
+ * propety with the following layout:
+ *
+ *	ti,dscr-kick-regs = <kickreg0 key0 kickreg1 key1>
+ *
+ * kickreg is the offset of the "kick" register
+ * key is the value which unlocks writing for protected regs
+ */
+static void __init dscr_parse_kick_regs(struct device_node *node,
+					void __iomem *base)
+{
+	u32 vals[4];
+	int err;
+
+	err = of_property_read_u32_array(node, "ti,dscr-kick-regs", vals, 4);
+	if (!err) {
+		dscr.kick_reg[0] = vals[0];
+		dscr.kick_key[0] = vals[1];
+		dscr.kick_reg[1] = vals[2];
+		dscr.kick_key[1] = vals[3];
+	}
+}
+
+
+/*
+ * SoCs may provide controls to enable/disable individual IP blocks. These
+ * controls in the DSCR usually control pin drivers but also may control
+ * clocking and or resets. The device tree is used to describe the bitfields
+ * in registers used to control device state. The number of bits and their
+ * values may vary even within the same register.
+ *
+ * The layout of these bitfields is described by the ti,dscr-devstate-ctl-regs
+ * property. This property is a list where each element describes a contiguous
+ * range of control fields with like properties. Each element of the list
+ * consists of 7 cells with the following values:
+ *
+ *   start_id num_ids reg enable disable start_bit nbits
+ *
+ * start_id is device id for the first device control in the range
+ * num_ids is the number of device controls in the range
+ * reg is the offset of the register holding the control bits
+ * enable is the value to enable a device
+ * disable is the value to disable a device (0xffffffff if cannot disable)
+ * start_bit is the bit number of the first bit in the range
+ * nbits is the number of bits per device control
+ */
+static void __init dscr_parse_devstate_ctl_regs(struct device_node *node,
+						void __iomem *base)
+{
+	struct devstate_ctl_reg *r;
+	const __be32 *p;
+	int i, j, size;
+
+	p = of_get_property(node, "ti,dscr-devstate-ctl-regs", &size);
+	if (p) {
+		/* parse all the ranges we can handle */
+		size /= (sizeof(*p) * 7);
+		if (size > MAX_DEVCTL_REGS)
+			size = MAX_DEVCTL_REGS;
+
+		for (i = 0; i < size; i++) {
+			r = &dscr.devctl[i];
+
+			r->start_id = be32_to_cpup(p++);
+			r->num_ids = be32_to_cpup(p++);
+			r->reg = be32_to_cpup(p++);
+			r->enable = be32_to_cpup(p++);
+			r->disable = be32_to_cpup(p++);
+			if (r->disable == 0xffffffff)
+				r->enable_only = 1;
+			r->shift = be32_to_cpup(p++);
+			r->nbits = be32_to_cpup(p++);
+
+			for (j = r->start_id;
+			     j < (r->start_id + r->num_ids);
+			     j++)
+				dscr.devstate_info[j].ctl = r;
+		}
+	}
+}
+
+/*
+ * SoCs may provide status registers indicating the state (enabled/disabled) of
+ * devices on the SoC. The device tree is used to describe the bitfields in
+ * registers used to provide device status. The number of bits and their
+ * values used to provide status may vary even within the same register.
+ *
+ * The layout of these bitfields is described by the ti,dscr-devstate-stat-regs
+ * property. This property is a list where each element describes a contiguous
+ * range of status fields with like properties. Each element of the list
+ * consists of 7 cells with the following values:
+ *
+ *   start_id num_ids reg enable disable start_bit nbits
+ *
+ * start_id is device id for the first device status in the range
+ * num_ids is the number of devices covered by the range
+ * reg is the offset of the register holding the status bits
+ * enable is the value indicating device is enabled
+ * disable is the value indicating device is disabled
+ * start_bit is the bit number of the first bit in the range
+ * nbits is the number of bits per device status
+ */
+static void __init dscr_parse_devstate_stat_regs(struct device_node *node,
+						 void __iomem *base)
+{
+	struct devstate_stat_reg *r;
+	const __be32 *p;
+	int i, j, size;
+
+	p = of_get_property(node, "ti,dscr-devstate-stat-regs", &size);
+	if (p) {
+		/* parse all the ranges we can handle */
+		size /= (sizeof(*p) * 7);
+		if (size > MAX_DEVSTAT_REGS)
+			size = MAX_DEVSTAT_REGS;
+
+		for (i = 0; i < size; i++) {
+			r = &dscr.devstat[i];
+
+			r->start_id = be32_to_cpup(p++);
+			r->num_ids = be32_to_cpup(p++);
+			r->reg = be32_to_cpup(p++);
+			r->enable = be32_to_cpup(p++);
+			r->disable = be32_to_cpup(p++);
+			r->shift = be32_to_cpup(p++);
+			r->nbits = be32_to_cpup(p++);
+
+			for (j = r->start_id;
+			     j < (r->start_id + r->num_ids);
+			     j++)
+				dscr.devstate_info[j].stat = r;
+		}
+	}
+}
+
+static struct of_device_id dscr_ids[] __initdata = {
+	{ .compatible = "ti,c64x+dscr" },
+	{}
+};
+
+/*
+ * Probe for DSCR area.
+ *
+ * This has to be done early on in case timer or interrupt controller
+ * needs something. e.g. On C6455 SoC, timer must be enabled through
+ * DSCR before it is functional.
+ */
+void __init dscr_probe(void)
+{
+	struct device_node *node;
+	void __iomem *base;
+
+	spin_lock_init(&dscr.lock);
+
+	node = of_find_matching_node(NULL, dscr_ids);
+	if (!node)
+		return;
+
+	base = of_iomap(node, 0);
+	if (!base) {
+		of_node_put(node);
+		return;
+	}
+
+	dscr.base = base;
+
+	dscr_parse_devstat(node, base);
+	dscr_parse_silicon_rev(node, base);
+	dscr_parse_mac_fuse(node, base);
+	dscr_parse_rmii_resets(node, base);
+	dscr_parse_locked_regs(node, base);
+	dscr_parse_kick_regs(node, base);
+	dscr_parse_devstate_ctl_regs(node, base);
+	dscr_parse_devstate_stat_regs(node, base);
+	dscr_parse_privperm(node, base);
+}
diff --git a/arch/c6x/platforms/emif.c b/arch/c6x/platforms/emif.c
new file mode 100644
index 0000000..8b564de
--- /dev/null
+++ b/arch/c6x/platforms/emif.c
@@ -0,0 +1,87 @@
+/*
+ *  External Memory Interface
+ *
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <asm/soc.h>
+#include <asm/dscr.h>
+
+#define NUM_EMIFA_CHIP_ENABLES 4
+
+struct emifa_regs {
+	u32	midr;
+	u32	stat;
+	u32	reserved1[6];
+	u32	bprio;
+	u32	reserved2[23];
+	u32	cecfg[NUM_EMIFA_CHIP_ENABLES];
+	u32	reserved3[4];
+	u32	awcc;
+	u32	reserved4[7];
+	u32	intraw;
+	u32	intmsk;
+	u32	intmskset;
+	u32	intmskclr;
+};
+
+static struct of_device_id emifa_match[] __initdata = {
+	{ .compatible = "ti,c64x+emifa"	},
+	{}
+};
+
+/*
+ * Parse device tree for existence of an EMIF (External Memory Interface)
+ * and initialize it if found.
+ */
+static int __init c6x_emifa_init(void)
+{
+	struct emifa_regs __iomem *regs;
+	struct device_node *node;
+	const __be32 *p;
+	u32 val;
+	int i, len, err;
+
+	node = of_find_matching_node(NULL, emifa_match);
+	if (!node)
+		return 0;
+
+	regs = of_iomap(node, 0);
+	if (!regs)
+		return 0;
+
+	/* look for a dscr-based enable for emifa pin buffers */
+	err = of_property_read_u32_array(node, "ti,dscr-dev-enable", &val, 1);
+	if (!err)
+		dscr_set_devstate(val, DSCR_DEVSTATE_ENABLED);
+
+	/* set up the chip enables */
+	p = of_get_property(node, "ti,emifa-ce-config", &len);
+	if (p) {
+		len /= sizeof(u32);
+		if (len > NUM_EMIFA_CHIP_ENABLES)
+			len = NUM_EMIFA_CHIP_ENABLES;
+		for (i = 0; i <= len; i++)
+			soc_writel(be32_to_cpup(&p[i]), &regs->cecfg[i]);
+	}
+
+	err = of_property_read_u32_array(node, "ti,emifa-burst-priority", &val, 1);
+	if (!err)
+		soc_writel(val, &regs->bprio);
+
+	err = of_property_read_u32_array(node, "ti,emifa-async-wait-control", &val, 1);
+	if (!err)
+		soc_writel(val, &regs->awcc);
+
+	iounmap(regs);
+	of_node_put(node);
+	return 0;
+}
+pure_initcall(c6x_emifa_init);
diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c
new file mode 100644
index 0000000..7c37a94
--- /dev/null
+++ b/arch/c6x/platforms/megamod-pic.c
@@ -0,0 +1,349 @@
+/*
+ *  Support for C64x+ Megamodule Interrupt Controller
+ *
+ *  Copyright (C) 2010, 2011 Texas Instruments Incorporated
+ *  Contributed by: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <asm/soc.h>
+#include <asm/megamod-pic.h>
+
+#define NR_COMBINERS	4
+#define NR_MUX_OUTPUTS  12
+
+#define IRQ_UNMAPPED 0xffff
+
+/*
+ * Megamodule Interrupt Controller register layout
+ */
+struct megamod_regs {
+	u32	evtflag[8];
+	u32	evtset[8];
+	u32	evtclr[8];
+	u32	reserved0[8];
+	u32	evtmask[8];
+	u32	mevtflag[8];
+	u32	expmask[8];
+	u32	mexpflag[8];
+	u32	intmux_unused;
+	u32	intmux[7];
+	u32	reserved1[8];
+	u32	aegmux[2];
+	u32	reserved2[14];
+	u32	intxstat;
+	u32	intxclr;
+	u32	intdmask;
+	u32	reserved3[13];
+	u32	evtasrt;
+};
+
+struct megamod_pic {
+	struct irq_host	*irqhost;
+	struct megamod_regs __iomem *regs;
+	raw_spinlock_t lock;
+
+	/* hw mux mapping */
+	unsigned int output_to_irq[NR_MUX_OUTPUTS];
+};
+
+static struct megamod_pic *mm_pic;
+
+struct megamod_cascade_data {
+	struct megamod_pic *pic;
+	int index;
+};
+
+static struct megamod_cascade_data cascade_data[NR_COMBINERS];
+
+static void mask_megamod(struct irq_data *data)
+{
+	struct megamod_pic *pic = irq_data_get_irq_chip_data(data);
+	irq_hw_number_t src = irqd_to_hwirq(data);
+	u32 __iomem *evtmask = &pic->regs->evtmask[src / 32];
+
+	raw_spin_lock(&pic->lock);
+	soc_writel(soc_readl(evtmask) | (1 << (src & 31)), evtmask);
+	raw_spin_unlock(&pic->lock);
+}
+
+static void unmask_megamod(struct irq_data *data)
+{
+	struct megamod_pic *pic = irq_data_get_irq_chip_data(data);
+	irq_hw_number_t src = irqd_to_hwirq(data);
+	u32 __iomem *evtmask = &pic->regs->evtmask[src / 32];
+
+	raw_spin_lock(&pic->lock);
+	soc_writel(soc_readl(evtmask) & ~(1 << (src & 31)), evtmask);
+	raw_spin_unlock(&pic->lock);
+}
+
+static struct irq_chip megamod_chip = {
+	.name		= "megamod",
+	.irq_mask	= mask_megamod,
+	.irq_unmask	= unmask_megamod,
+};
+
+static void megamod_irq_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct megamod_cascade_data *cascade;
+	struct megamod_pic *pic;
+	u32 events;
+	int n, idx;
+
+	cascade = irq_desc_get_handler_data(desc);
+
+	pic = cascade->pic;
+	idx = cascade->index;
+
+	while ((events = soc_readl(&pic->regs->mevtflag[idx])) != 0) {
+		n = __ffs(events);
+
+		irq = irq_linear_revmap(pic->irqhost, idx * 32 + n);
+
+		soc_writel(1 << n, &pic->regs->evtclr[idx]);
+
+		generic_handle_irq(irq);
+	}
+}
+
+static int megamod_map(struct irq_host *h, unsigned int virq,
+		       irq_hw_number_t hw)
+{
+	struct megamod_pic *pic = h->host_data;
+	int i;
+
+	/* We shouldn't see a hwirq which is muxed to core controller */
+	for (i = 0; i < NR_MUX_OUTPUTS; i++)
+		if (pic->output_to_irq[i] == hw)
+			return -1;
+
+	irq_set_chip_data(virq, pic);
+	irq_set_chip_and_handler(virq, &megamod_chip, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int megamod_xlate(struct irq_host *h, struct device_node *ct,
+			 const u32 *intspec, unsigned int intsize,
+			 irq_hw_number_t *out_hwirq, unsigned int *out_type)
+
+{
+	/* megamod intspecs must have 1 cell */
+	BUG_ON(intsize != 1);
+	*out_hwirq = intspec[0];
+	*out_type = IRQ_TYPE_NONE;
+	return 0;
+}
+
+static struct irq_host_ops megamod_host_ops = {
+	.map	= megamod_map,
+	.xlate	= megamod_xlate,
+};
+
+static void __init set_megamod_mux(struct megamod_pic *pic, int src, int output)
+{
+	int index, offset;
+	u32 val;
+
+	if (src < 0 || src >= (NR_COMBINERS * 32)) {
+		pic->output_to_irq[output] = IRQ_UNMAPPED;
+		return;
+	}
+
+	/* four mappings per mux register */
+	index = output / 4;
+	offset = (output & 3) * 8;
+
+	val = soc_readl(&pic->regs->intmux[index]);
+	val &= ~(0xff << offset);
+	val |= src << offset;
+	soc_writel(val, &pic->regs->intmux[index]);
+}
+
+/*
+ * Parse the MUX mapping, if one exists.
+ *
+ * The MUX map is an array of up to 12 cells; one for each usable core priority
+ * interrupt. The value of a given cell is the megamodule interrupt source
+ * which is to me MUXed to the output corresponding to the cell position
+ * withing the array. The first cell in the array corresponds to priority
+ * 4 and the last (12th) cell corresponds to priority 15. The allowed
+ * values are 4 - ((NR_COMBINERS * 32) - 1). Note that the combined interrupt
+ * sources (0 - 3) are not allowed to be mapped through this property. They
+ * are handled through the "interrupts" property. This allows us to use a
+ * value of zero as a "do not map" placeholder.
+ */
+static void __init parse_priority_map(struct megamod_pic *pic,
+				      int *mapping, int size)
+{
+	struct device_node *np = pic->irqhost->of_node;
+	const __be32 *map;
+	int i, maplen;
+	u32 val;
+
+	map = of_get_property(np, "ti,c64x+megamod-pic-mux", &maplen);
+	if (map) {
+		maplen /= 4;
+		if (maplen > size)
+			maplen = size;
+
+		for (i = 0; i < maplen; i++) {
+			val = be32_to_cpup(map);
+			if (val && val >= 4)
+				mapping[i] = val;
+			++map;
+		}
+	}
+}
+
+static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
+{
+	struct megamod_pic *pic;
+	int i, irq;
+	int mapping[NR_MUX_OUTPUTS];
+
+	pr_info("Initializing C64x+ Megamodule PIC\n");
+
+	pic = kzalloc(sizeof(struct megamod_pic), GFP_KERNEL);
+	if (!pic) {
+		pr_err("%s: Could not alloc PIC structure.\n", np->full_name);
+		return NULL;
+	}
+
+	pic->irqhost = irq_alloc_host(np, IRQ_HOST_MAP_LINEAR,
+				      NR_COMBINERS * 32, &megamod_host_ops,
+				      IRQ_UNMAPPED);
+	if (!pic->irqhost) {
+		pr_err("%s: Could not alloc host.\n", np->full_name);
+		goto error_free;
+	}
+
+	pic->irqhost->host_data = pic;
+
+	raw_spin_lock_init(&pic->lock);
+
+	pic->regs = of_iomap(np, 0);
+	if (!pic->regs) {
+		pr_err("%s: Could not map registers.\n", np->full_name);
+		goto error_free;
+	}
+
+	/* Initialize MUX map */
+	for (i = 0; i < ARRAY_SIZE(mapping); i++)
+		mapping[i] = IRQ_UNMAPPED;
+
+	parse_priority_map(pic, mapping, ARRAY_SIZE(mapping));
+
+	/*
+	 * We can have up to 12 interrupts cascading to the core controller.
+	 * These cascades can be from the combined interrupt sources or for
+	 * individual interrupt sources. The "interrupts" property only
+	 * deals with the cascaded combined interrupts. The individual
+	 * interrupts muxed to the core controller use the core controller
+	 * as their interrupt parent.
+	 */
+	for (i = 0; i < NR_COMBINERS; i++) {
+
+		irq = irq_of_parse_and_map(np, i);
+		if (irq == NO_IRQ)
+			continue;
+
+		/*
+		 * We count on the core priority interrupts (4 - 15) being
+		 * direct mapped. Check that device tree provided something
+		 * in that range.
+		 */
+		if (irq < 4 || irq >= NR_PRIORITY_IRQS) {
+			pr_err("%s: combiner-%d virq %d out of range!\n",
+				 np->full_name, i, irq);
+			continue;
+		}
+
+		/* record the mapping */
+		mapping[irq - 4] = i;
+
+		pr_debug("%s: combiner-%d cascading to virq %d\n",
+			 np->full_name, i, irq);
+
+		cascade_data[i].pic = pic;
+		cascade_data[i].index = i;
+
+		/* mask and clear all events in combiner */
+		soc_writel(~0, &pic->regs->evtmask[i]);
+		soc_writel(~0, &pic->regs->evtclr[i]);
+
+		irq_set_handler_data(irq, &cascade_data[i]);
+		irq_set_chained_handler(irq, megamod_irq_cascade);
+	}
+
+	/* Finally, set up the MUX registers */
+	for (i = 0; i < NR_MUX_OUTPUTS; i++) {
+		if (mapping[i] != IRQ_UNMAPPED) {
+			pr_debug("%s: setting mux %d to priority %d\n",
+				 np->full_name, mapping[i], i + 4);
+			set_megamod_mux(pic, mapping[i], i);
+		}
+	}
+
+	return pic;
+
+error_free:
+	kfree(pic);
+
+	return NULL;
+}
+
+/*
+ * Return next active event after ACK'ing it.
+ * Return -1 if no events active.
+ */
+static int get_exception(void)
+{
+	int i, bit;
+	u32 mask;
+
+	for (i = 0; i < NR_COMBINERS; i++) {
+		mask = soc_readl(&mm_pic->regs->mexpflag[i]);
+		if (mask) {
+			bit = __ffs(mask);
+			soc_writel(1 << bit, &mm_pic->regs->evtclr[i]);
+			return (i * 32) + bit;
+		}
+	}
+	return -1;
+}
+
+static void assert_event(unsigned int val)
+{
+	soc_writel(val, &mm_pic->regs->evtasrt);
+}
+
+void __init megamod_pic_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ti,c64x+megamod-pic");
+	if (!np)
+		return;
+
+	mm_pic = init_megamod_pic(np);
+	of_node_put(np);
+
+	soc_ops.get_exception = get_exception;
+	soc_ops.assert_event = assert_event;
+
+	return;
+}
diff --git a/arch/c6x/platforms/platform.c b/arch/c6x/platforms/platform.c
new file mode 100644
index 0000000..26c1a35
--- /dev/null
+++ b/arch/c6x/platforms/platform.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2011 Texas Instruments Incorporated
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static int __init c6x_device_probe(void)
+{
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	return 0;
+}
+core_initcall(c6x_device_probe);
diff --git a/arch/c6x/platforms/pll.c b/arch/c6x/platforms/pll.c
new file mode 100644
index 0000000..3aa898f
--- /dev/null
+++ b/arch/c6x/platforms/pll.c
@@ -0,0 +1,444 @@
+/*
+ * Clock and PLL control for C64x+ devices
+ *
+ * Copyright (C) 2010, 2011 Texas Instruments.
+ * Contributed by: Mark Salter <msalter@redhat.com>
+ *
+ * Copied heavily from arm/mach-davinci/clock.c, so:
+ *
+ * Copyright (C) 2006-2007 Texas Instruments.
+ * Copyright (C) 2008-2009 Deep Root Systems, LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/clkdev.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/err.h>
+
+#include <asm/clock.h>
+#include <asm/soc.h>
+
+static LIST_HEAD(clocks);
+static DEFINE_MUTEX(clocks_mutex);
+static DEFINE_SPINLOCK(clockfw_lock);
+
+static void __clk_enable(struct clk *clk)
+{
+	if (clk->parent)
+		__clk_enable(clk->parent);
+	clk->usecount++;
+}
+
+static void __clk_disable(struct clk *clk)
+{
+	if (WARN_ON(clk->usecount == 0))
+		return;
+	--clk->usecount;
+
+	if (clk->parent)
+		__clk_disable(clk->parent);
+}
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	spin_lock_irqsave(&clockfw_lock, flags);
+	__clk_enable(clk);
+	spin_unlock_irqrestore(&clockfw_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long flags;
+
+	if (clk == NULL || IS_ERR(clk))
+		return;
+
+	spin_lock_irqsave(&clockfw_lock, flags);
+	__clk_disable(clk);
+	spin_unlock_irqrestore(&clockfw_lock, flags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	return clk->rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	if (clk->round_rate)
+		return clk->round_rate(clk, rate);
+
+	return clk->rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+/* Propagate rate to children */
+static void propagate_rate(struct clk *root)
+{
+	struct clk *clk;
+
+	list_for_each_entry(clk, &root->children, childnode) {
+		if (clk->recalc)
+			clk->rate = clk->recalc(clk);
+		propagate_rate(clk);
+	}
+}
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (clk == NULL || IS_ERR(clk))
+		return ret;
+
+	if (clk->set_rate)
+		ret = clk->set_rate(clk, rate);
+
+	spin_lock_irqsave(&clockfw_lock, flags);
+	if (ret == 0) {
+		if (clk->recalc)
+			clk->rate = clk->recalc(clk);
+		propagate_rate(clk);
+	}
+	spin_unlock_irqrestore(&clockfw_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	unsigned long flags;
+
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	/* Cannot change parent on enabled clock */
+	if (WARN_ON(clk->usecount))
+		return -EINVAL;
+
+	mutex_lock(&clocks_mutex);
+	clk->parent = parent;
+	list_del_init(&clk->childnode);
+	list_add(&clk->childnode, &clk->parent->children);
+	mutex_unlock(&clocks_mutex);
+
+	spin_lock_irqsave(&clockfw_lock, flags);
+	if (clk->recalc)
+		clk->rate = clk->recalc(clk);
+	propagate_rate(clk);
+	spin_unlock_irqrestore(&clockfw_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+int clk_register(struct clk *clk)
+{
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	if (WARN(clk->parent && !clk->parent->rate,
+		 "CLK: %s parent %s has no rate!\n",
+		 clk->name, clk->parent->name))
+		return -EINVAL;
+
+	mutex_lock(&clocks_mutex);
+	list_add_tail(&clk->node, &clocks);
+	if (clk->parent)
+		list_add_tail(&clk->childnode, &clk->parent->children);
+	mutex_unlock(&clocks_mutex);
+
+	/* If rate is already set, use it */
+	if (clk->rate)
+		return 0;
+
+	/* Else, see if there is a way to calculate it */
+	if (clk->recalc)
+		clk->rate = clk->recalc(clk);
+
+	/* Otherwise, default to parent rate */
+	else if (clk->parent)
+		clk->rate = clk->parent->rate;
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_register);
+
+void clk_unregister(struct clk *clk)
+{
+	if (clk == NULL || IS_ERR(clk))
+		return;
+
+	mutex_lock(&clocks_mutex);
+	list_del(&clk->node);
+	list_del(&clk->childnode);
+	mutex_unlock(&clocks_mutex);
+}
+EXPORT_SYMBOL(clk_unregister);
+
+
+static u32 pll_read(struct pll_data *pll, int reg)
+{
+	return soc_readl(pll->base + reg);
+}
+
+static unsigned long clk_sysclk_recalc(struct clk *clk)
+{
+	u32 v, plldiv = 0;
+	struct pll_data *pll;
+	unsigned long rate = clk->rate;
+
+	if (WARN_ON(!clk->parent))
+		return rate;
+
+	rate = clk->parent->rate;
+
+	/* the parent must be a PLL */
+	if (WARN_ON(!clk->parent->pll_data))
+		return rate;
+
+	pll = clk->parent->pll_data;
+
+	/* If pre-PLL, source clock is before the multiplier and divider(s) */
+	if (clk->flags & PRE_PLL)
+		rate = pll->input_rate;
+
+	if (!clk->div) {
+		pr_debug("%s: (no divider) rate = %lu KHz\n",
+			 clk->name, rate / 1000);
+		return rate;
+	}
+
+	if (clk->flags & FIXED_DIV_PLL) {
+		rate /= clk->div;
+		pr_debug("%s: (fixed divide by %d) rate = %lu KHz\n",
+			 clk->name, clk->div, rate / 1000);
+		return rate;
+	}
+
+	v = pll_read(pll, clk->div);
+	if (v & PLLDIV_EN)
+		plldiv = (v & PLLDIV_RATIO_MASK) + 1;
+
+	if (plldiv == 0)
+		plldiv = 1;
+
+	rate /= plldiv;
+
+	pr_debug("%s: (divide by %d) rate = %lu KHz\n",
+		 clk->name, plldiv, rate / 1000);
+
+	return rate;
+}
+
+static unsigned long clk_leafclk_recalc(struct clk *clk)
+{
+	if (WARN_ON(!clk->parent))
+		return clk->rate;
+
+	pr_debug("%s: (parent %s) rate = %lu KHz\n",
+		 clk->name, clk->parent->name,	clk->parent->rate / 1000);
+
+	return clk->parent->rate;
+}
+
+static unsigned long clk_pllclk_recalc(struct clk *clk)
+{
+	u32 ctrl, mult = 0, prediv = 0, postdiv = 0;
+	u8 bypass;
+	struct pll_data *pll = clk->pll_data;
+	unsigned long rate = clk->rate;
+
+	if (clk->flags & FIXED_RATE_PLL)
+		return rate;
+
+	ctrl = pll_read(pll, PLLCTL);
+	rate = pll->input_rate = clk->parent->rate;
+
+	if (ctrl & PLLCTL_PLLEN)
+		bypass = 0;
+	else
+		bypass = 1;
+
+	if (pll->flags & PLL_HAS_MUL) {
+		mult = pll_read(pll, PLLM);
+		mult = (mult & PLLM_PLLM_MASK) + 1;
+	}
+	if (pll->flags & PLL_HAS_PRE) {
+		prediv = pll_read(pll, PLLPRE);
+		if (prediv & PLLDIV_EN)
+			prediv = (prediv & PLLDIV_RATIO_MASK) + 1;
+		else
+			prediv = 0;
+	}
+	if (pll->flags & PLL_HAS_POST) {
+		postdiv = pll_read(pll, PLLPOST);
+		if (postdiv & PLLDIV_EN)
+			postdiv = (postdiv & PLLDIV_RATIO_MASK) + 1;
+		else
+			postdiv = 1;
+	}
+
+	if (!bypass) {
+		if (prediv)
+			rate /= prediv;
+		if (mult)
+			rate *= mult;
+		if (postdiv)
+			rate /= postdiv;
+
+		pr_debug("PLL%d: input = %luMHz, pre[%d] mul[%d] post[%d] "
+			 "--> %luMHz output.\n",
+			 pll->num, clk->parent->rate / 1000000,
+			 prediv, mult, postdiv, rate / 1000000);
+	} else
+		pr_debug("PLL%d: input = %luMHz, bypass mode.\n",
+			 pll->num, clk->parent->rate / 1000000);
+
+	return rate;
+}
+
+
+static void __init __init_clk(struct clk *clk)
+{
+	INIT_LIST_HEAD(&clk->node);
+	INIT_LIST_HEAD(&clk->children);
+	INIT_LIST_HEAD(&clk->childnode);
+
+	if (!clk->recalc) {
+
+		/* Check if clock is a PLL */
+		if (clk->pll_data)
+			clk->recalc = clk_pllclk_recalc;
+
+		/* Else, if it is a PLL-derived clock */
+		else if (clk->flags & CLK_PLL)
+			clk->recalc = clk_sysclk_recalc;
+
+		/* Otherwise, it is a leaf clock (PSC clock) */
+		else if (clk->parent)
+			clk->recalc = clk_leafclk_recalc;
+	}
+}
+
+void __init c6x_clks_init(struct clk_lookup *clocks)
+{
+	struct clk_lookup *c;
+	struct clk *clk;
+	size_t num_clocks = 0;
+
+	for (c = clocks; c->clk; c++) {
+		clk = c->clk;
+
+		__init_clk(clk);
+		clk_register(clk);
+		num_clocks++;
+
+		/* Turn on clocks that Linux doesn't otherwise manage */
+		if (clk->flags & ALWAYS_ENABLED)
+			clk_enable(clk);
+	}
+
+	clkdev_add_table(clocks, num_clocks);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define CLKNAME_MAX	10		/* longest clock name */
+#define NEST_DELTA	2
+#define NEST_MAX	4
+
+static void
+dump_clock(struct seq_file *s, unsigned nest, struct clk *parent)
+{
+	char		*state;
+	char		buf[CLKNAME_MAX + NEST_DELTA * NEST_MAX];
+	struct clk	*clk;
+	unsigned	i;
+
+	if (parent->flags & CLK_PLL)
+		state = "pll";
+	else
+		state = "";
+
+	/* <nest spaces> name <pad to end> */
+	memset(buf, ' ', sizeof(buf) - 1);
+	buf[sizeof(buf) - 1] = 0;
+	i = strlen(parent->name);
+	memcpy(buf + nest, parent->name,
+	       min(i, (unsigned)(sizeof(buf) - 1 - nest)));
+
+	seq_printf(s, "%s users=%2d %-3s %9ld Hz\n",
+		   buf, parent->usecount, state, clk_get_rate(parent));
+	/* REVISIT show device associations too */
+
+	/* cost is now small, but not linear... */
+	list_for_each_entry(clk, &parent->children, childnode) {
+		dump_clock(s, nest + NEST_DELTA, clk);
+	}
+}
+
+static int c6x_ck_show(struct seq_file *m, void *v)
+{
+	struct clk *clk;
+
+	/*
+	 * Show clock tree; We trust nonzero usecounts equate to PSC enables...
+	 */
+	mutex_lock(&clocks_mutex);
+	list_for_each_entry(clk, &clocks, node)
+		if (!clk->parent)
+			dump_clock(m, 0, clk);
+	mutex_unlock(&clocks_mutex);
+
+	return 0;
+}
+
+static int c6x_ck_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, c6x_ck_show, NULL);
+}
+
+static const struct file_operations c6x_ck_operations = {
+	.open		= c6x_ck_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init c6x_clk_debugfs_init(void)
+{
+	debugfs_create_file("c6x_clocks", S_IFREG | S_IRUGO, NULL, NULL,
+			    &c6x_ck_operations);
+
+	return 0;
+}
+device_initcall(c6x_clk_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
new file mode 100644
index 0000000..2cfd6f4
--- /dev/null
+++ b/arch/c6x/platforms/plldata.c
@@ -0,0 +1,404 @@
+/*
+ *  Port on Texas Instruments TMS320C6x architecture
+ *
+ *  Copyright (C) 2011 Texas Instruments Incorporated
+ *  Author: Mark Salter <msalter@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/clock.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+
+/*
+ * Common SoC clock support.
+ */
+
+/* Default input for PLL1 */
+struct clk clkin1 = {
+	.name = "clkin1",
+	.node = LIST_HEAD_INIT(clkin1.node),
+	.children = LIST_HEAD_INIT(clkin1.children),
+	.childnode = LIST_HEAD_INIT(clkin1.childnode),
+};
+
+struct pll_data c6x_soc_pll1 = {
+	.num	   = 1,
+	.sysclks   = {
+		{
+			.name = "pll1",
+			.parent = &clkin1,
+			.pll_data = &c6x_soc_pll1,
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk1",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk2",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk3",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk4",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk5",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk6",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk7",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk8",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk9",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk10",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk11",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk12",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk13",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk14",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk15",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+		{
+			.name = "pll1_sysclk16",
+			.parent = &c6x_soc_pll1.sysclks[0],
+			.flags = CLK_PLL,
+		},
+	},
+};
+
+/* CPU core clock */
+struct clk c6x_core_clk = {
+	.name = "core",
+};
+
+/* miscellaneous IO clocks */
+struct clk c6x_i2c_clk = {
+	.name = "i2c",
+};
+
+struct clk c6x_watchdog_clk = {
+	.name = "watchdog",
+};
+
+struct clk c6x_mcbsp1_clk = {
+	.name = "mcbsp1",
+};
+
+struct clk c6x_mcbsp2_clk = {
+	.name = "mcbsp2",
+};
+
+struct clk c6x_mdio_clk = {
+	.name = "mdio",
+};
+
+
+#ifdef CONFIG_SOC_TMS320C6455
+static struct clk_lookup c6455_clks[] = {
+	CLK(NULL, "pll1", &c6x_soc_pll1.sysclks[0]),
+	CLK(NULL, "pll1_sysclk2", &c6x_soc_pll1.sysclks[2]),
+	CLK(NULL, "pll1_sysclk3", &c6x_soc_pll1.sysclks[3]),
+	CLK(NULL, "pll1_sysclk4", &c6x_soc_pll1.sysclks[4]),
+	CLK(NULL, "pll1_sysclk5", &c6x_soc_pll1.sysclks[5]),
+	CLK(NULL, "core", &c6x_core_clk),
+	CLK("i2c_davinci.1", NULL, &c6x_i2c_clk),
+	CLK("watchdog", NULL, &c6x_watchdog_clk),
+	CLK("2c81800.mdio", NULL, &c6x_mdio_clk),
+	CLK("", NULL, NULL)
+};
+
+
+static void __init c6455_setup_clocks(struct device_node *node)
+{
+	struct pll_data *pll = &c6x_soc_pll1;
+	struct clk *sysclks = pll->sysclks;
+
+	pll->flags = PLL_HAS_PRE | PLL_HAS_MUL;
+
+	sysclks[2].flags |= FIXED_DIV_PLL;
+	sysclks[2].div = 3;
+	sysclks[3].flags |= FIXED_DIV_PLL;
+	sysclks[3].div = 6;
+	sysclks[4].div = PLLDIV4;
+	sysclks[5].div = PLLDIV5;
+
+	c6x_core_clk.parent = &sysclks[0];
+	c6x_i2c_clk.parent = &sysclks[3];
+	c6x_watchdog_clk.parent = &sysclks[3];
+	c6x_mdio_clk.parent = &sysclks[3];
+
+	c6x_clks_init(c6455_clks);
+}
+#endif /* CONFIG_SOC_TMS320C6455 */
+
+#ifdef CONFIG_SOC_TMS320C6457
+static struct clk_lookup c6457_clks[] = {
+	CLK(NULL, "pll1", &c6x_soc_pll1.sysclks[0]),
+	CLK(NULL, "pll1_sysclk1", &c6x_soc_pll1.sysclks[1]),
+	CLK(NULL, "pll1_sysclk2", &c6x_soc_pll1.sysclks[2]),
+	CLK(NULL, "pll1_sysclk3", &c6x_soc_pll1.sysclks[3]),
+	CLK(NULL, "pll1_sysclk4", &c6x_soc_pll1.sysclks[4]),
+	CLK(NULL, "pll1_sysclk5", &c6x_soc_pll1.sysclks[5]),
+	CLK(NULL, "core", &c6x_core_clk),
+	CLK("i2c_davinci.1", NULL, &c6x_i2c_clk),
+	CLK("watchdog", NULL, &c6x_watchdog_clk),
+	CLK("2c81800.mdio", NULL, &c6x_mdio_clk),
+	CLK("", NULL, NULL)
+};
+
+static void __init c6457_setup_clocks(struct device_node *node)
+{
+	struct pll_data *pll = &c6x_soc_pll1;
+	struct clk *sysclks = pll->sysclks;
+
+	pll->flags = PLL_HAS_MUL | PLL_HAS_POST;
+
+	sysclks[1].flags |= FIXED_DIV_PLL;
+	sysclks[1].div = 1;
+	sysclks[2].flags |= FIXED_DIV_PLL;
+	sysclks[2].div = 3;
+	sysclks[3].flags |= FIXED_DIV_PLL;
+	sysclks[3].div = 6;
+	sysclks[4].div = PLLDIV4;
+	sysclks[5].div = PLLDIV5;
+
+	c6x_core_clk.parent = &sysclks[1];
+	c6x_i2c_clk.parent = &sysclks[3];
+	c6x_watchdog_clk.parent = &sysclks[5];
+	c6x_mdio_clk.parent = &sysclks[5];
+
+	c6x_clks_init(c6457_clks);
+}
+#endif /* CONFIG_SOC_TMS320C6455 */
+
+#ifdef CONFIG_SOC_TMS320C6472
+static struct clk_lookup c6472_clks[] = {
+	CLK(NULL, "pll1", &c6x_soc_pll1.sysclks[0]),
+	CLK(NULL, "pll1_sysclk1", &c6x_soc_pll1.sysclks[1]),
+	CLK(NULL, "pll1_sysclk2", &c6x_soc_pll1.sysclks[2]),
+	CLK(NULL, "pll1_sysclk3", &c6x_soc_pll1.sysclks[3]),
+	CLK(NULL, "pll1_sysclk4", &c6x_soc_pll1.sysclks[4]),
+	CLK(NULL, "pll1_sysclk5", &c6x_soc_pll1.sysclks[5]),
+	CLK(NULL, "pll1_sysclk6", &c6x_soc_pll1.sysclks[6]),
+	CLK(NULL, "pll1_sysclk7", &c6x_soc_pll1.sysclks[7]),
+	CLK(NULL, "pll1_sysclk8", &c6x_soc_pll1.sysclks[8]),
+	CLK(NULL, "pll1_sysclk9", &c6x_soc_pll1.sysclks[9]),
+	CLK(NULL, "pll1_sysclk10", &c6x_soc_pll1.sysclks[10]),
+	CLK(NULL, "core", &c6x_core_clk),
+	CLK("i2c_davinci.1", NULL, &c6x_i2c_clk),
+	CLK("watchdog", NULL, &c6x_watchdog_clk),
+	CLK("2c81800.mdio", NULL, &c6x_mdio_clk),
+	CLK("", NULL, NULL)
+};
+
+/* assumptions used for delay loop calculations */
+#define MIN_CLKIN1_KHz 15625
+#define MAX_CORE_KHz   700000
+#define MIN_PLLOUT_KHz MIN_CLKIN1_KHz
+
+static void __init c6472_setup_clocks(struct device_node *node)
+{
+	struct pll_data *pll = &c6x_soc_pll1;
+	struct clk *sysclks = pll->sysclks;
+	int i;
+
+	pll->flags = PLL_HAS_MUL;
+
+	for (i = 1; i <= 6; i++) {
+		sysclks[i].flags |= FIXED_DIV_PLL;
+		sysclks[i].div = 1;
+	}
+
+	sysclks[7].flags |= FIXED_DIV_PLL;
+	sysclks[7].div = 3;
+	sysclks[8].flags |= FIXED_DIV_PLL;
+	sysclks[8].div = 6;
+	sysclks[9].flags |= FIXED_DIV_PLL;
+	sysclks[9].div = 2;
+	sysclks[10].div = PLLDIV10;
+
+	c6x_core_clk.parent = &sysclks[get_coreid() + 1];
+	c6x_i2c_clk.parent = &sysclks[8];
+	c6x_watchdog_clk.parent = &sysclks[8];
+	c6x_mdio_clk.parent = &sysclks[5];
+
+	c6x_clks_init(c6472_clks);
+}
+#endif /* CONFIG_SOC_TMS320C6472 */
+
+
+#ifdef CONFIG_SOC_TMS320C6474
+static struct clk_lookup c6474_clks[] = {
+	CLK(NULL, "pll1", &c6x_soc_pll1.sysclks[0]),
+	CLK(NULL, "pll1_sysclk7", &c6x_soc_pll1.sysclks[7]),
+	CLK(NULL, "pll1_sysclk9", &c6x_soc_pll1.sysclks[9]),
+	CLK(NULL, "pll1_sysclk10", &c6x_soc_pll1.sysclks[10]),
+	CLK(NULL, "pll1_sysclk11", &c6x_soc_pll1.sysclks[11]),
+	CLK(NULL, "pll1_sysclk12", &c6x_soc_pll1.sysclks[12]),
+	CLK(NULL, "pll1_sysclk13", &c6x_soc_pll1.sysclks[13]),
+	CLK(NULL, "core", &c6x_core_clk),
+	CLK("i2c_davinci.1", NULL, &c6x_i2c_clk),
+	CLK("mcbsp.1", NULL, &c6x_mcbsp1_clk),
+	CLK("mcbsp.2", NULL, &c6x_mcbsp2_clk),
+	CLK("watchdog", NULL, &c6x_watchdog_clk),
+	CLK("2c81800.mdio", NULL, &c6x_mdio_clk),
+	CLK("", NULL, NULL)
+};
+
+static void __init c6474_setup_clocks(struct device_node *node)
+{
+	struct pll_data *pll = &c6x_soc_pll1;
+	struct clk *sysclks = pll->sysclks;
+
+	pll->flags = PLL_HAS_MUL;
+
+	sysclks[7].flags |= FIXED_DIV_PLL;
+	sysclks[7].div = 1;
+	sysclks[9].flags |= FIXED_DIV_PLL;
+	sysclks[9].div = 3;
+	sysclks[10].flags |= FIXED_DIV_PLL;
+	sysclks[10].div = 6;
+
+	sysclks[11].div = PLLDIV11;
+
+	sysclks[12].flags |= FIXED_DIV_PLL;
+	sysclks[12].div = 2;
+
+	sysclks[13].div = PLLDIV13;
+
+	c6x_core_clk.parent = &sysclks[7];
+	c6x_i2c_clk.parent = &sysclks[10];
+	c6x_watchdog_clk.parent = &sysclks[10];
+	c6x_mcbsp1_clk.parent = &sysclks[10];
+	c6x_mcbsp2_clk.parent = &sysclks[10];
+
+	c6x_clks_init(c6474_clks);
+}
+#endif /* CONFIG_SOC_TMS320C6474 */
+
+static struct of_device_id c6x_clkc_match[] __initdata = {
+#ifdef CONFIG_SOC_TMS320C6455
+	{ .compatible = "ti,c6455-pll", .data = c6455_setup_clocks },
+#endif
+#ifdef CONFIG_SOC_TMS320C6457
+	{ .compatible = "ti,c6457-pll", .data = c6457_setup_clocks },
+#endif
+#ifdef CONFIG_SOC_TMS320C6472
+	{ .compatible = "ti,c6472-pll", .data = c6472_setup_clocks },
+#endif
+#ifdef CONFIG_SOC_TMS320C6474
+	{ .compatible = "ti,c6474-pll", .data = c6474_setup_clocks },
+#endif
+	{ .compatible = "ti,c64x+pll" },
+	{}
+};
+
+void __init c64x_setup_clocks(void)
+{
+	void (*__setup_clocks)(struct device_node *np);
+	struct pll_data *pll = &c6x_soc_pll1;
+	struct device_node *node;
+	const struct of_device_id *id;
+	int err;
+	u32 val;
+
+	node = of_find_matching_node(NULL, c6x_clkc_match);
+	if (!node)
+		return;
+
+	pll->base = of_iomap(node, 0);
+	if (!pll->base)
+		goto out;
+
+	err = of_property_read_u32(node, "clock-frequency", &val);
+	if (err || val == 0) {
+		pr_err("%s: no clock-frequency found! Using %dMHz\n",
+		       node->full_name, (int)val / 1000000);
+		val = 25000000;
+	}
+	clkin1.rate = val;
+
+	err = of_property_read_u32(node, "ti,c64x+pll-bypass-delay", &val);
+	if (err)
+		val = 5000;
+	pll->bypass_delay = val;
+
+	err = of_property_read_u32(node, "ti,c64x+pll-reset-delay", &val);
+	if (err)
+		val = 30000;
+	pll->reset_delay = val;
+
+	err = of_property_read_u32(node, "ti,c64x+pll-lock-delay", &val);
+	if (err)
+		val = 30000;
+	pll->lock_delay = val;
+
+	/* id->data is a pointer to SoC-specific setup */
+	id = of_match_node(c6x_clkc_match, node);
+	if (id && id->data) {
+		__setup_clocks = id->data;
+		__setup_clocks(node);
+	}
+
+out:
+	of_node_put(node);
+}
diff --git a/arch/c6x/platforms/timer64.c b/arch/c6x/platforms/timer64.c
new file mode 100644
index 0000000..03c03c2
--- /dev/null
+++ b/arch/c6x/platforms/timer64.c
@@ -0,0 +1,244 @@
+/*
+ *  Copyright (C) 2010, 2011 Texas Instruments Incorporated
+ *  Contributed by: Mark Salter (msalter@redhat.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <asm/soc.h>
+#include <asm/dscr.h>
+#include <asm/timer64.h>
+
+struct timer_regs {
+	u32	reserved0;
+	u32	emumgt;
+	u32	reserved1;
+	u32	reserved2;
+	u32	cntlo;
+	u32	cnthi;
+	u32	prdlo;
+	u32	prdhi;
+	u32	tcr;
+	u32	tgcr;
+	u32	wdtcr;
+};
+
+static struct timer_regs __iomem *timer;
+
+#define TCR_TSTATLO	     0x001
+#define TCR_INVOUTPLO	     0x002
+#define TCR_INVINPLO	     0x004
+#define TCR_CPLO	     0x008
+#define TCR_ENAMODELO_ONCE   0x040
+#define TCR_ENAMODELO_CONT   0x080
+#define TCR_ENAMODELO_MASK   0x0c0
+#define TCR_PWIDLO_MASK      0x030
+#define TCR_CLKSRCLO	     0x100
+#define TCR_TIENLO	     0x200
+#define TCR_TSTATHI	     (0x001 << 16)
+#define TCR_INVOUTPHI	     (0x002 << 16)
+#define TCR_CPHI	     (0x008 << 16)
+#define TCR_PWIDHI_MASK      (0x030 << 16)
+#define TCR_ENAMODEHI_ONCE   (0x040 << 16)
+#define TCR_ENAMODEHI_CONT   (0x080 << 16)
+#define TCR_ENAMODEHI_MASK   (0x0c0 << 16)
+
+#define TGCR_TIMLORS	     0x001
+#define TGCR_TIMHIRS	     0x002
+#define TGCR_TIMMODE_UD32    0x004
+#define TGCR_TIMMODE_WDT64   0x008
+#define TGCR_TIMMODE_CD32    0x00c
+#define TGCR_TIMMODE_MASK    0x00c
+#define TGCR_PSCHI_MASK      (0x00f << 8)
+#define TGCR_TDDRHI_MASK     (0x00f << 12)
+
+/*
+ * Timer clocks are divided down from the CPU clock
+ * The divisor is in the EMUMGTCLKSPD register
+ */
+#define TIMER_DIVISOR \
+	((soc_readl(&timer->emumgt) & (0xf << 16)) >> 16)
+
+#define TIMER64_RATE (c6x_core_freq / TIMER_DIVISOR)
+
+#define TIMER64_MODE_DISABLED 0
+#define TIMER64_MODE_ONE_SHOT TCR_ENAMODELO_ONCE
+#define TIMER64_MODE_PERIODIC TCR_ENAMODELO_CONT
+
+static int timer64_mode;
+static int timer64_devstate_id = -1;
+
+static void timer64_config(unsigned long period)
+{
+	u32 tcr = soc_readl(&timer->tcr) & ~TCR_ENAMODELO_MASK;
+
+	soc_writel(tcr, &timer->tcr);
+	soc_writel(period - 1, &timer->prdlo);
+	soc_writel(0, &timer->cntlo);
+	tcr |= timer64_mode;
+	soc_writel(tcr, &timer->tcr);
+}
+
+static void timer64_enable(void)
+{
+	u32 val;
+
+	if (timer64_devstate_id >= 0)
+		dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED);
+
+	/* disable timer, reset count */
+	soc_writel(soc_readl(&timer->tcr) & ~TCR_ENAMODELO_MASK, &timer->tcr);
+	soc_writel(0, &timer->prdlo);
+
+	/* use internal clock and 1 cycle pulse width */
+	val = soc_readl(&timer->tcr);
+	soc_writel(val & ~(TCR_CLKSRCLO | TCR_PWIDLO_MASK), &timer->tcr);
+
+	/* dual 32-bit unchained mode */
+	val = soc_readl(&timer->tgcr) & ~TGCR_TIMMODE_MASK;
+	soc_writel(val, &timer->tgcr);
+	soc_writel(val | (TGCR_TIMLORS | TGCR_TIMMODE_UD32), &timer->tgcr);
+}
+
+static void timer64_disable(void)
+{
+	/* disable timer, reset count */
+	soc_writel(soc_readl(&timer->tcr) & ~TCR_ENAMODELO_MASK, &timer->tcr);
+	soc_writel(0, &timer->prdlo);
+
+	if (timer64_devstate_id >= 0)
+		dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_DISABLED);
+}
+
+static int next_event(unsigned long delta,
+		      struct clock_event_device *evt)
+{
+	timer64_config(delta);
+	return 0;
+}
+
+static void set_clock_mode(enum clock_event_mode mode,
+			   struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		timer64_enable();
+		timer64_mode = TIMER64_MODE_PERIODIC;
+		timer64_config(TIMER64_RATE / HZ);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		timer64_enable();
+		timer64_mode = TIMER64_MODE_ONE_SHOT;
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		timer64_mode = TIMER64_MODE_DISABLED;
+		timer64_disable();
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+	}
+}
+
+static struct clock_event_device t64_clockevent_device = {
+	.name		= "TIMER64_EVT32_TIMER",
+	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+	.rating		= 200,
+	.set_mode	= set_clock_mode,
+	.set_next_event	= next_event,
+};
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *cd = &t64_clockevent_device;
+
+	cd->event_handler(cd);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction timer_iact = {
+	.name		= "timer",
+	.flags		= IRQF_TIMER,
+	.handler	= timer_interrupt,
+	.dev_id		= &t64_clockevent_device,
+};
+
+void __init timer64_init(void)
+{
+	struct clock_event_device *cd = &t64_clockevent_device;
+	struct device_node *np, *first = NULL;
+	u32 val;
+	int err, found = 0;
+
+	for_each_compatible_node(np, NULL, "ti,c64x+timer64") {
+		err = of_property_read_u32(np, "ti,core-mask", &val);
+		if (!err) {
+			if (val & (1 << get_coreid())) {
+				found = 1;
+				break;
+			}
+		} else if (!first)
+			first = np;
+	}
+	if (!found) {
+		/* try first one with no core-mask */
+		if (first)
+			np = of_node_get(first);
+		else {
+			pr_debug("Cannot find ti,c64x+timer64 timer.\n");
+			return;
+		}
+	}
+
+	timer = of_iomap(np, 0);
+	if (!timer) {
+		pr_debug("%s: Cannot map timer registers.\n", np->full_name);
+		goto out;
+	}
+	pr_debug("%s: Timer registers=%p.\n", np->full_name, timer);
+
+	cd->irq	= irq_of_parse_and_map(np, 0);
+	if (cd->irq == NO_IRQ) {
+		pr_debug("%s: Cannot find interrupt.\n", np->full_name);
+		iounmap(timer);
+		goto out;
+	}
+
+	/* If there is a device state control, save the ID. */
+	err = of_property_read_u32(np, "ti,dscr-dev-enable", &val);
+	if (!err) {
+		timer64_devstate_id = val;
+
+		/*
+		 * It is necessary to enable the timer block here because
+		 * the TIMER_DIVISOR macro needs to read a timer register
+		 * to get the divisor.
+		 */
+		dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED);
+	}
+
+	pr_debug("%s: Timer irq=%d.\n", np->full_name, cd->irq);
+
+	clockevents_calc_mult_shift(cd, c6x_core_freq / TIMER_DIVISOR, 5);
+
+	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns	= clockevent_delta2ns(250, cd);
+
+	cd->cpumask		= cpumask_of(smp_processor_id());
+
+	clockevents_register_device(cd);
+	setup_irq(cd->irq, &timer_iact);
+
+out:
+	of_node_put(np);
+	return;
+}
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 408b055..b3abfb0 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -19,10 +19,6 @@ config GENERIC_CMOS_UPDATE
 config ARCH_USES_GETTIMEOFFSET
 	def_bool n
 
-config GENERIC_IOMAP
-       bool
-       default y
-
 config ARCH_HAS_ILOG2_U32
 	bool
 	default n
@@ -52,6 +48,7 @@ config CRIS
 	select HAVE_IDE
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IOMAP
 
 config HZ
 	int
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index a2bde37..b34438e 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -404,8 +404,7 @@ static int __init init_axis_flash(void)
 		 */
 		int blockstat;
 		do {
-			blockstat = main_mtd->block_isbad(main_mtd,
-				ptable_sector);
+			blockstat = mtd_block_isbad(main_mtd, ptable_sector);
 			if (blockstat < 0)
 				ptable_sector = 0; /* read error */
 			else if (blockstat)
@@ -413,8 +412,8 @@ static int __init init_axis_flash(void)
 		} while (blockstat && ptable_sector);
 #endif
 		if (ptable_sector) {
-			main_mtd->read(main_mtd, ptable_sector, PAGESIZE,
-				&len, page);
+			mtd_read(main_mtd, ptable_sector, PAGESIZE, &len,
+				 page);
 			ptable_head = &((struct partitiontable *) page)->head;
 		}
 
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index c5e69ab..bbbf7927 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -317,6 +317,7 @@ config PCI
 	bool "Use PCI"
 	depends on MB93090_MB00
 	default y
+	select GENERIC_PCI_IOMAP
 	help
 	  Some FR-V systems (such as the MB93090-MB00 VDK) have PCI
 	  onboard. If you have one of these boards and you wish to use the PCI
diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h
index ca7475e..8cb50a2 100644
--- a/arch/frv/include/asm/io.h
+++ b/arch/frv/include/asm/io.h
@@ -21,6 +21,7 @@
 #include <asm/virtconvert.h>
 #include <asm/string.h>
 #include <asm/mb-regs.h>
+#include <asm-generic/pci_iomap.h>
 #include <linux/delay.h>
 
 /*
@@ -370,7 +371,6 @@ static inline void iowrite32_rep(void __iomem *p, const void *src, unsigned long
 
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
 {
 }
diff --git a/arch/frv/mb93090-mb00/Makefile b/arch/frv/mb93090-mb00/Makefile
index b73b542..21f1df1 100644
--- a/arch/frv/mb93090-mb00/Makefile
+++ b/arch/frv/mb93090-mb00/Makefile
@@ -3,7 +3,7 @@
 #
 
 ifeq "$(CONFIG_PCI)" "y"
-obj-y := pci-frv.o pci-irq.o pci-vdk.o pci-iomap.o
+obj-y := pci-frv.o pci-irq.o pci-vdk.o
 
 ifeq "$(CONFIG_MMU)" "y"
 obj-y += pci-dma.o
diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c
deleted file mode 100644
index 35f6df2..0000000
--- a/arch/frv/mb93090-mb00/pci-iomap.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* pci-iomap.c: description
- *
- * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/pci.h>
-#include <linux/module.h>
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-
-	if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM))
-		return (void __iomem *) start;
-
-	return NULL;
-}
-
-EXPORT_SYMBOL(pci_iomap);
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 02513c2..9059e39 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -26,6 +26,7 @@ config HEXAGON
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
 	select NO_IOPORT
+	select GENERIC_IOMAP
 	# mostly generic routines, with some accelerated ones
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
@@ -73,9 +74,6 @@ config GENERIC_CSUM
 config GENERIC_IRQ_PROBE
 	def_bool y
 
-config GENERIC_IOMAP
-	def_bool y
-
 #config ZONE_DMA
 #	bool
 #	default y
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b7a7c4..bd72669 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config IA64
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select GENERIC_IOMAP
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -105,10 +106,6 @@ config EFI
 	bool
 	default y
 
-config GENERIC_IOMAP
-	bool
-	default y
-
 config ARCH_CLOCKSOURCE_DATA
 	def_bool y
 
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 105c93b..b6a809f 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -11,10 +11,12 @@ extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_pass_through;
 extern int iommu_detected;
+extern int iommu_group_mf;
 #else
 #define iommu_pass_through	(0)
 #define no_iommu		(1)
 #define iommu_detected		(0)
+#define iommu_group_mf		(0)
 #endif
 extern void iommu_dma_init(void);
 extern void machvec_init(const char *name);
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index 1d2427d..fbb5198 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -71,7 +71,7 @@
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint, unsigned int);
 __DEFINE_GUEST_HANDLE(ulong, unsigned long);
-__DEFINE_GUEST_HANDLE(u64, unsigned long);
+
 DEFINE_GUEST_HANDLE(char);
 DEFINE_GUEST_HANDLE(int);
 DEFINE_GUEST_HANDLE(long);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index c16162c..eb11757 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -33,6 +33,7 @@ int force_iommu __read_mostly;
 #endif
 
 int iommu_pass_through;
+int iommu_group_mf;
 
 /* Dummy device used for NULL arguments (normally ISA). Better would
    be probably a smaller DMA mask, but this is bug-to-bug compatible
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 43f4c92..4050520 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -774,13 +774,13 @@ struct kvm *kvm_arch_alloc_vm(void)
 	return kvm;
 }
 
-struct kvm_io_range {
+struct kvm_ia64_io_range {
 	unsigned long start;
 	unsigned long size;
 	unsigned long type;
 };
 
-static const struct kvm_io_range io_ranges[] = {
+static const struct kvm_ia64_io_range io_ranges[] = {
 	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
 	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
 	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
@@ -1366,14 +1366,12 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int i, j;
+	int j;
 	unsigned long base_gfn;
 
 	slots = kvm_memslots(kvm);
-	for (i = 0; i < slots->nmemslots; i++) {
-		memslot = &slots->memslots[i];
+	kvm_for_each_memslot(memslot, slots) {
 		base_gfn = memslot->base_gfn;
-
 		for (j = 0; j < memslot->npages; j++) {
 			if (memslot->rmap[j])
 				put_page((struct page *)memslot->rmap[j]);
@@ -1820,7 +1818,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[log->slot];
+	memslot = id_to_memslot(kvm->memslots, log->slot);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 81fdaa7..99c3636 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -37,9 +37,6 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config GENERIC_IOMAP
-	def_bool MMU
-
 config GENERIC_CSUM
 	bool
 
@@ -81,6 +78,7 @@ source "kernel/Kconfig.freezer"
 config MMU
 	bool "MMU-based Paged Memory Management Support"
 	default y
+	select GENERIC_IOMAP
 	help
 	  Select if you want MMU-based virtualised addressing space
 	  support by paged memory management. If unsure, say 'Y'.
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index e446bab..f0eead7 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -17,6 +17,7 @@ config MICROBLAZE
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_PCI_IOMAP
 
 config SWAP
 	def_bool n
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index cc54187..a175132 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -9,7 +9,14 @@
 #ifndef _ASM_MICROBLAZE_IRQ_H
 #define _ASM_MICROBLAZE_IRQ_H
 
-#define NR_IRQS 32
+
+/*
+ * Linux IRQ# is currently offset by one to map to the hardware
+ * irq number. So hardware IRQ0 maps to Linux irq 1.
+ */
+#define NO_IRQ_OFFSET	1
+#define IRQ_OFFSET	NO_IRQ_OFFSET
+#define NR_IRQS		(32 + IRQ_OFFSET)
 #include <asm-generic/irq.h>
 
 /* This type is the placeholder for a hardware interrupt number. It has to
@@ -20,8 +27,6 @@ typedef unsigned long irq_hw_number_t;
 
 extern unsigned int nr_irq;
 
-#define NO_IRQ (-1)
-
 struct pt_regs;
 extern void do_IRQ(struct pt_regs *regs);
 
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index ed9d0f6..a25e6b5 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -174,15 +174,8 @@ extern int page_is_ram(unsigned long pfn);
 
 #define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
 
-
-#  ifndef CONFIG_MMU
-#  define __pa(vaddr)	((unsigned long) (vaddr))
-#  define __va(paddr)	((void *) (paddr))
-#  else /* CONFIG_MMU */
-#  define __pa(x)	__virt_to_phys((unsigned long)(x))
-#  define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
-#  endif /* CONFIG_MMU */
-
+# define __pa(x)	__virt_to_phys((unsigned long)(x))
+# define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
 
 /* Convert between virtual and physical address for MMU. */
 /* Handle MicroBlaze processor with virtual memory. */
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 904e5ef..6c72ed7 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -26,12 +26,6 @@ int setup_early_printk(char *opt);
 void remap_early_printk(void);
 void disable_early_printk(void);
 
-#if defined(CONFIG_EARLY_PRINTK)
-#define eprintk early_printk
-#else
-#define eprintk printk
-#endif
-
 void heartbeat(void);
 void setup_heartbeat(void);
 
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 7d7092b..d20ffbc 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -391,8 +391,11 @@
 #define __NR_clock_adjtime	373
 #define __NR_syncfs		374
 #define __NR_setns		375
+#define __NR_sendmmsg		376
+#define __NR_process_vm_readv	377
+#define __NR_process_vm_writev	378
 
-#define __NR_syscalls		376
+#define __NR_syscalls		379
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index d26d92d..8356e47 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -50,9 +50,9 @@ static void early_printk_uartlite_write(struct console *unused,
 					const char *s, unsigned n)
 {
 	while (*s && n-- > 0) {
-		early_printk_uartlite_putc(*s);
 		if (*s == '\n')
 			early_printk_uartlite_putc('\r');
+		early_printk_uartlite_putc(*s);
 		s++;
 	}
 }
@@ -94,9 +94,9 @@ static void early_printk_uart16550_write(struct console *unused,
 					const char *s, unsigned n)
 {
 	while (*s && n-- > 0) {
-		early_printk_uart16550_putc(*s);
 		if (*s == '\n')
 			early_printk_uart16550_putc('\r');
+		early_printk_uart16550_putc(*s);
 		s++;
 	}
 }
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index ca15bc5..66e34a3 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -468,7 +468,7 @@ C_ENTRY(sys_fork_wrapper):
 	addi	r5, r0, SIGCHLD			/* Arg 0: flags */
 	lwi	r6, r1, PT_R1	/* Arg 1: child SP (use parent's) */
 	addik	r7, r1, 0			/* Arg 2: parent context */
-	add	r8. r0, r0			/* Arg 3: (unused) */
+	add	r8, r0, r0			/* Arg 3: (unused) */
 	add	r9, r0, r0;			/* Arg 4: (unused) */
 	brid	do_fork		/* Do real work (tail-call) */
 	add	r10, r0, r0;			/* Arg 5: (unused) */
diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c
index eb41441..44b177e 100644
--- a/arch/microblaze/kernel/intc.c
+++ b/arch/microblaze/kernel/intc.c
@@ -42,8 +42,9 @@ unsigned int nr_irq;
 
 static void intc_enable_or_unmask(struct irq_data *d)
 {
-	unsigned long mask = 1 << d->irq;
-	pr_debug("enable_or_unmask: %d\n", d->irq);
+	unsigned long mask = 1 << d->hwirq;
+
+	pr_debug("enable_or_unmask: %ld\n", d->hwirq);
 	out_be32(INTC_BASE + SIE, mask);
 
 	/* ack level irqs because they can't be acked during
@@ -56,20 +57,21 @@ static void intc_enable_or_unmask(struct irq_data *d)
 
 static void intc_disable_or_mask(struct irq_data *d)
 {
-	pr_debug("disable: %d\n", d->irq);
-	out_be32(INTC_BASE + CIE, 1 << d->irq);
+	pr_debug("disable: %ld\n", d->hwirq);
+	out_be32(INTC_BASE + CIE, 1 << d->hwirq);
 }
 
 static void intc_ack(struct irq_data *d)
 {
-	pr_debug("ack: %d\n", d->irq);
-	out_be32(INTC_BASE + IAR, 1 << d->irq);
+	pr_debug("ack: %ld\n", d->hwirq);
+	out_be32(INTC_BASE + IAR, 1 << d->hwirq);
 }
 
 static void intc_mask_ack(struct irq_data *d)
 {
-	unsigned long mask = 1 << d->irq;
-	pr_debug("disable_and_ack: %d\n", d->irq);
+	unsigned long mask = 1 << d->hwirq;
+
+	pr_debug("disable_and_ack: %ld\n", d->hwirq);
 	out_be32(INTC_BASE + CIE, mask);
 	out_be32(INTC_BASE + IAR, mask);
 }
@@ -91,7 +93,7 @@ unsigned int get_irq(struct pt_regs *regs)
 	 * order to handle multiple interrupt controllers. It currently
 	 * is hardcoded to check for interrupts only on the first INTC.
 	 */
-	irq = in_be32(INTC_BASE + IVR);
+	irq = in_be32(INTC_BASE + IVR) + NO_IRQ_OFFSET;
 	pr_debug("get_irq: %d\n", irq);
 
 	return irq;
@@ -99,7 +101,7 @@ unsigned int get_irq(struct pt_regs *regs)
 
 void __init init_IRQ(void)
 {
-	u32 i, j, intr_type;
+	u32 i, intr_mask;
 	struct device_node *intc = NULL;
 #ifdef CONFIG_SELFMOD_INTC
 	unsigned int intc_baseaddr = 0;
@@ -113,35 +115,24 @@ void __init init_IRQ(void)
 				0
 			};
 #endif
-	const char * const intc_list[] = {
-				"xlnx,xps-intc-1.00.a",
-				NULL
-			};
-
-	for (j = 0; intc_list[j] != NULL; j++) {
-		intc = of_find_compatible_node(NULL, NULL, intc_list[j]);
-		if (intc)
-			break;
-	}
+	intc = of_find_compatible_node(NULL, NULL, "xlnx,xps-intc-1.00.a");
 	BUG_ON(!intc);
 
-	intc_baseaddr = be32_to_cpup(of_get_property(intc,
-								"reg", NULL));
+	intc_baseaddr = be32_to_cpup(of_get_property(intc, "reg", NULL));
 	intc_baseaddr = (unsigned long) ioremap(intc_baseaddr, PAGE_SIZE);
 	nr_irq = be32_to_cpup(of_get_property(intc,
 						"xlnx,num-intr-inputs", NULL));
 
-	intr_type =
-		be32_to_cpup(of_get_property(intc,
-						"xlnx,kind-of-intr", NULL));
-	if (intr_type > (u32)((1ULL << nr_irq) - 1))
+	intr_mask =
+		be32_to_cpup(of_get_property(intc, "xlnx,kind-of-intr", NULL));
+	if (intr_mask > (u32)((1ULL << nr_irq) - 1))
 		printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n");
 
 #ifdef CONFIG_SELFMOD_INTC
 	selfmod_function((int *) arr_func, intc_baseaddr);
 #endif
-	printk(KERN_INFO "%s #0 at 0x%08x, num_irq=%d, edge=0x%x\n",
-		intc_list[j], intc_baseaddr, nr_irq, intr_type);
+	printk(KERN_INFO "XPS intc #0 at 0x%08x, num_irq=%d, edge=0x%x\n",
+		intc_baseaddr, nr_irq, intr_mask);
 
 	/*
 	 * Disable all external interrupts until they are
@@ -155,8 +146,8 @@ void __init init_IRQ(void)
 	/* Turn on the Master Enable. */
 	out_be32(intc_baseaddr + MER, MER_HIE | MER_ME);
 
-	for (i = 0; i < nr_irq; ++i) {
-		if (intr_type & (0x00000001 << i)) {
+	for (i = IRQ_OFFSET; i < (nr_irq + IRQ_OFFSET); ++i) {
+		if (intr_mask & (0x00000001 << (i - IRQ_OFFSET))) {
 			irq_set_chip_and_handler_name(i, &intc_dev,
 				handle_edge_irq, "edge");
 			irq_clear_status_flags(i, IRQ_LEVEL);
@@ -165,5 +156,6 @@ void __init init_IRQ(void)
 				handle_level_irq, "level");
 			irq_set_status_flags(i, IRQ_LEVEL);
 		}
+		irq_get_irq_data(i)->hwirq = i - IRQ_OFFSET;
 	}
 }
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index e5d63a8..bbebcae 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -33,11 +33,12 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
 	irq_enter();
 	irq = get_irq(regs);
 next_irq:
-	BUG_ON(irq == -1U);
-	generic_handle_irq(irq);
+	BUG_ON(!irq);
+	/* Substract 1 because of get_irq */
+	generic_handle_irq(irq + IRQ_OFFSET - NO_IRQ_OFFSET);
 
 	irq = get_irq(regs);
-	if (irq != -1U) {
+	if (irq) {
 		pr_debug("next irq: %d\n", irq);
 		++concurrent_irq;
 		goto next_irq;
@@ -52,13 +53,13 @@ next_irq:
   intc without any cascades or any connection that's why mapping is 1:1 */
 unsigned int irq_create_mapping(struct irq_host *host, irq_hw_number_t hwirq)
 {
-	return hwirq;
+	return hwirq + IRQ_OFFSET;
 }
 EXPORT_SYMBOL_GPL(irq_create_mapping);
 
 unsigned int irq_create_of_mapping(struct device_node *controller,
 				   const u32 *intspec, unsigned int intsize)
 {
-	return intspec[0];
+	return intspec[0] + IRQ_OFFSET;
 }
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);
diff --git a/arch/microblaze/kernel/module.c b/arch/microblaze/kernel/module.c
index 142426f..f39257a 100644
--- a/arch/microblaze/kernel/module.c
+++ b/arch/microblaze/kernel/module.c
@@ -100,7 +100,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
 			break;
 
 		case R_MICROBLAZE_64_NONE:
-			pr_debug("R_MICROBLAZE_NONE\n");
+			pr_debug("R_MICROBLAZE_64_NONE\n");
 			break;
 
 		case R_MICROBLAZE_NONE:
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 0e654a1..604cd9d 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -145,32 +145,32 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
 	setup_early_printk(NULL);
 #endif
 
-	eprintk("Ramdisk addr 0x%08x, ", ram);
+	printk("Ramdisk addr 0x%08x, ", ram);
 	if (fdt)
-		eprintk("FDT at 0x%08x\n", fdt);
+		printk("FDT at 0x%08x\n", fdt);
 	else
-		eprintk("Compiled-in FDT at 0x%08x\n",
+		printk("Compiled-in FDT at 0x%08x\n",
 					(unsigned int)_fdt_start);
 
 #ifdef CONFIG_MTD_UCLINUX
-	eprintk("Found romfs @ 0x%08x (0x%08x)\n",
+	printk("Found romfs @ 0x%08x (0x%08x)\n",
 			romfs_base, romfs_size);
-	eprintk("#### klimit %p ####\n", old_klimit);
+	printk("#### klimit %p ####\n", old_klimit);
 	BUG_ON(romfs_size < 0); /* What else can we do? */
 
-	eprintk("Moved 0x%08x bytes from 0x%08x to 0x%08x\n",
+	printk("Moved 0x%08x bytes from 0x%08x to 0x%08x\n",
 			romfs_size, romfs_base, (unsigned)&_ebss);
 
-	eprintk("New klimit: 0x%08x\n", (unsigned)klimit);
+	printk("New klimit: 0x%08x\n", (unsigned)klimit);
 #endif
 
 #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
 	if (msr)
-		eprintk("!!!Your kernel has setup MSR instruction but "
+		printk("!!!Your kernel has setup MSR instruction but "
 				"CPU don't have it %x\n", msr);
 #else
 	if (!msr)
-		eprintk("!!!Your kernel not setup MSR instruction but "
+		printk("!!!Your kernel not setup MSR instruction but "
 				"CPU have it %x\n", msr);
 #endif
 
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 8789daa..6a2b294 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -380,3 +380,6 @@ ENTRY(sys_call_table)
 	.long sys_clock_adjtime
 	.long sys_syncfs
 	.long sys_setns			/* 375 */
+	.long sys_sendmmsg
+	.long sys_process_vm_readv
+	.long sys_process_vm_writev
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index af74b11..3cb0bf6 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -243,7 +243,7 @@ static int timer_initialized;
 
 void __init time_init(void)
 {
-	u32 irq, i = 0;
+	u32 irq;
 	u32 timer_num = 1;
 	struct device_node *timer = NULL;
 	const void *prop;
@@ -258,33 +258,24 @@ void __init time_init(void)
 				0
 			};
 #endif
-	const char * const timer_list[] = {
-		"xlnx,xps-timer-1.00.a",
-		NULL
-	};
-
-	for (i = 0; timer_list[i] != NULL; i++) {
-		timer = of_find_compatible_node(NULL, NULL, timer_list[i]);
-		if (timer)
-			break;
-	}
+	timer = of_find_compatible_node(NULL, NULL, "xlnx,xps-timer-1.00.a");
 	BUG_ON(!timer);
 
 	timer_baseaddr = be32_to_cpup(of_get_property(timer, "reg", NULL));
 	timer_baseaddr = (unsigned long) ioremap(timer_baseaddr, PAGE_SIZE);
-	irq = be32_to_cpup(of_get_property(timer, "interrupts", NULL));
+	irq = irq_of_parse_and_map(timer, 0);
 	timer_num = be32_to_cpup(of_get_property(timer,
 						"xlnx,one-timer-only", NULL));
 	if (timer_num) {
-		eprintk(KERN_EMERG "Please enable two timers in HW\n");
+		printk(KERN_EMERG "Please enable two timers in HW\n");
 		BUG();
 	}
 
 #ifdef CONFIG_SELFMOD_TIMER
 	selfmod_function((int *) arr_func, timer_baseaddr);
 #endif
-	printk(KERN_INFO "%s #0 at 0x%08x, irq=%d\n",
-		timer_list[i], timer_baseaddr, irq);
+	printk(KERN_INFO "XPS timer #0 at 0x%08x, irq=%d\n",
+		timer_baseaddr, irq);
 
 	/* If there is clock-frequency property than use it */
 	prop = of_get_property(timer, "clock-frequency", NULL);
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index c13067b..844960e 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -20,6 +20,7 @@ lib-y += uaccess_old.o
 
 lib-y += ashldi3.o
 lib-y += ashrdi3.o
+lib-y += cmpdi2.o
 lib-y += divsi3.o
 lib-y += lshrdi3.o
 lib-y += modsi3.o
diff --git a/arch/microblaze/lib/cmpdi2.c b/arch/microblaze/lib/cmpdi2.c
new file mode 100644
index 0000000..a708400
--- /dev/null
+++ b/arch/microblaze/lib/cmpdi2.c
@@ -0,0 +1,26 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+word_type __cmpdi2(long long a, long long b)
+{
+	const DWunion au = {
+		.ll = a
+	};
+	const DWunion bu = {
+		.ll = b
+	};
+
+	if (au.s.high < bu.s.high)
+		return 0;
+	else if (au.s.high > bu.s.high)
+		return 2;
+
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+
+	return 1;
+}
+EXPORT_SYMBOL(__cmpdi2);
diff --git a/arch/microblaze/pci/iomap.c b/arch/microblaze/pci/iomap.c
index 57acda8..b07abba 100644
--- a/arch/microblaze/pci/iomap.c
+++ b/arch/microblaze/pci/iomap.c
@@ -10,25 +10,6 @@
 #include <asm/io.h>
 #include <asm/pci-bridge.h>
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len)
-		return NULL;
-	if (max && len > max)
-		len = max;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
-	/* What? */
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
 	if (isa_vaddr_is_ioport(addr))
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index db841c7..0d71b2e 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -242,7 +242,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 			 line, pin);
 
 		virq = irq_create_mapping(NULL, line);
-		if (virq != NO_IRQ)
+		if (virq)
 			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
 	} else {
 		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
@@ -253,7 +253,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
 					     oirq.size);
 	}
-	if (virq == NO_IRQ) {
+	if (!virq) {
 		pr_debug(" Failed to map !\n");
 		return -1;
 	}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a7636d3..29d9218 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -16,6 +16,7 @@ config MIPS
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select RTC_LIB if !MACH_LOONGSON
 	select GENERIC_ATOMIC64 if !64BIT
 	select HAVE_DMA_ATTRS
@@ -2316,6 +2317,7 @@ config PCI
 	bool "Support for PCI controller"
 	depends on HW_HAS_PCI
 	select PCI_DOMAINS
+	select GENERIC_PCI_IOMAP
 	help
 	  Find out whether you have a PCI motherboard. PCI is the name of a
 	  bus system, i.e. the way the CPU talks to the other stuff inside
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 40b223b..c223854 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -834,10 +834,13 @@ static struct mtd_partition mtd_partitions[] = {
 	}
 };
 
+static const char *bcm63xx_part_types[] = { "bcm63xxpart", NULL };
+
 static struct physmap_flash_data flash_data = {
 	.width			= 2,
 	.nr_parts		= ARRAY_SIZE(mtd_partitions),
 	.parts			= mtd_partitions,
+	.part_probe_types	= bcm63xx_part_types,
 };
 
 static struct resource mtd_resources[] = {
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
index ed72e6a..1e6b587 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
@@ -16,7 +16,6 @@
 #define TAGINFO1_LEN		30	/* Length of vendor information field1 in tag */
 #define FLASHLAYOUTVER_LEN	4	/* Length of Flash Layout Version String tag */
 #define TAGINFO2_LEN		16	/* Length of vendor information field2 in tag */
-#define CRC_LEN			4	/* Length of CRC in bytes */
 #define ALTTAGINFO_LEN		54	/* Alternate length for vendor information; Pirelli */
 
 #define NUM_PIRELLI		2
@@ -77,19 +76,19 @@ struct bcm_tag {
 	/* 192-195: Version flash layout */
 	char flash_layout_ver[FLASHLAYOUTVER_LEN];
 	/* 196-199: kernel+rootfs CRC32 */
-	char fskernel_crc[CRC_LEN];
+	__u32 fskernel_crc;
 	/* 200-215: Unused except on Alice Gate where is is information */
 	char information2[TAGINFO2_LEN];
 	/* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
-	char image_crc[CRC_LEN];
+	__u32 image_crc;
 	/* 220-223: CRC32 of rootfs partition */
-	char rootfs_crc[CRC_LEN];
+	__u32 rootfs_crc;
 	/* 224-227: CRC32 of kernel partition */
-	char kernel_crc[CRC_LEN];
+	__u32 kernel_crc;
 	/* 228-235: Unused at present */
 	char reserved1[8];
 	/* 236-239: CRC32 of header excluding last 20 bytes */
-	char header_crc[CRC_LEN];
+	__u32 header_crc;
 	/* 240-255: Unused at present */
 	char reserved2[16];
 };
diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c
index 2ab899c..2635b1a 100644
--- a/arch/mips/lib/iomap-pci.c
+++ b/arch/mips/lib/iomap-pci.c
@@ -40,32 +40,6 @@ static void __iomem *ioport_map_pci(struct pci_dev *dev,
 	return (void __iomem *) (ctrl->io_map_base + port);
 }
 
-/*
- * Create a virtual mapping cookie for a PCI BAR (memory or IO)
- */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map_pci(dev, start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	/* What? */
-	return NULL;
-}
-
-EXPORT_SYMBOL(pci_iomap);
-
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	iounmap(addr);
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 438db84..8f1c40d 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -252,6 +252,7 @@ config PCI
 	bool "Use PCI"
 	depends on MN10300_UNIT_ASB2305
 	default y
+	select GENERIC_PCI_IOMAP
 	help
 	  Some systems (such as the ASB2305) have PCI onboard. If you have one
 	  of these boards and you wish to use the PCI facilities, say Y here.
diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h
index 787255d..139df8c 100644
--- a/arch/mn10300/include/asm/io.h
+++ b/arch/mn10300/include/asm/io.h
@@ -229,7 +229,6 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
 
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
 {
 }
@@ -251,15 +250,15 @@ static inline void *phys_to_virt(unsigned long address)
 /*
  * Change "struct page" to physical address.
  */
-static inline void *__ioremap(unsigned long offset, unsigned long size,
-			      unsigned long flags)
+static inline void __iomem *__ioremap(unsigned long offset, unsigned long size,
+				      unsigned long flags)
 {
-	return (void *) offset;
+	return (void __iomem *) offset;
 }
 
-static inline void *ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
-	return (void *) offset;
+	return (void __iomem *) offset;
 }
 
 /*
@@ -267,14 +266,14 @@ static inline void *ioremap(unsigned long offset, unsigned long size)
  * area.  it's useful if some control registers are in such an area and write
  * combining or read caching is not desirable:
  */
-static inline void *ioremap_nocache(unsigned long offset, unsigned long size)
+static inline void __iomem *ioremap_nocache(unsigned long offset, unsigned long size)
 {
-	return (void *) (offset | 0x20000000);
+	return (void __iomem *) (offset | 0x20000000);
 }
 
 #define ioremap_wc ioremap_nocache
 
-static inline void iounmap(void *addr)
+static inline void iounmap(void __iomem *addr)
 {
 }
 
diff --git a/arch/mn10300/unit-asb2305/Makefile b/arch/mn10300/unit-asb2305/Makefile
index 0551022..cbc5aba 100644
--- a/arch/mn10300/unit-asb2305/Makefile
+++ b/arch/mn10300/unit-asb2305/Makefile
@@ -5,4 +5,4 @@
 ###############################################################################
 obj-y   := unit-init.o leds.o
 
-obj-$(CONFIG_PCI) += pci.o pci-asb2305.o pci-irq.o pci-iomap.o
+obj-$(CONFIG_PCI) += pci.o pci-asb2305.o pci-irq.o
diff --git a/arch/mn10300/unit-asb2305/pci-iomap.c b/arch/mn10300/unit-asb2305/pci-iomap.c
deleted file mode 100644
index c1a8d8f..0000000
--- a/arch/mn10300/unit-asb2305/pci-iomap.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* ASB2305 PCI I/O mapping handler
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#include <linux/pci.h>
-#include <linux/module.h>
-
-/*
- * Create a virtual mapping cookie for a PCI BAR (memory or IO)
- */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-
-	if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM))
-		return (void __iomem *) start;
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e518a5a..081a54f 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -38,9 +38,6 @@ config RWSEM_XCHGADD_ALGORITHM
 config GENERIC_HWEIGHT
 	def_bool y
 
-config GENERIC_IOMAP
-	def_bool y
-
 config NO_IOPORT
 	def_bool y
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fdfd8be..242a1b7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -14,6 +14,7 @@ config PARISC
 	select GENERIC_ATOMIC64 if !64BIT
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
+	select GENERIC_PCI_IOMAP
 	select IRQ_PER_CPU
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 8f470c9..fb8e10a 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -436,28 +436,6 @@ void ioport_unmap(void __iomem *addr)
 	}
 }
 
-/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	/* What? */
-	return NULL;
-}
-
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	if (!INDIRECT_ADDR(addr)) {
@@ -483,5 +461,4 @@ EXPORT_SYMBOL(iowrite16_rep);
 EXPORT_SYMBOL(iowrite32_rep);
 EXPORT_SYMBOL(ioport_map);
 EXPORT_SYMBOL(ioport_unmap);
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 692ac75..1919634 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -718,6 +718,7 @@ config PCI
 	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx
 	default PCI_QSPAN if !4xx && !CPM2 && 8xx
 	select ARCH_SUPPORTS_MSI
+	select GENERIC_PCI_IOMAP
 	help
 	  Find out whether your system includes a PCI bus. PCI is the name of
 	  a bus system, i.e. the way the CPU talks to the other stuff inside
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 0ad432b..f7727d9 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -170,8 +170,8 @@ struct kvm_sregs {
 			} ppc64;
 			struct {
 				__u32 sr[16];
-				__u64 ibat[8]; 
-				__u64 dbat[8]; 
+				__u64 ibat[8];
+				__u64 dbat[8];
 			} ppc32;
 		} s;
 		struct {
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 2627918..97a3715 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -119,24 +119,6 @@ EXPORT_SYMBOL(ioport_map);
 EXPORT_SYMBOL(ioport_unmap);
 
 #ifdef CONFIG_PCI
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len)
-		return NULL;
-	if (max && len > max)
-		len = max;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
-	/* What? */
-	return NULL;
-}
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
 	if (isa_vaddr_is_ioport(addr))
@@ -146,6 +128,5 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 	iounmap(addr);
 }
 
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_PCI */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a459479..e41ac6f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -498,7 +498,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		memslot = &kvm->memslots->memslots[log->slot];
+		memslot = id_to_memslot(kvm->memslots, log->slot);
 
 		ga = memslot->base_gfn << PAGE_SHIFT;
 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 286f13d..a795a13 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -86,7 +86,7 @@ static inline int lpcr_rmls(unsigned long rma_size)
  * to allocate contiguous physical memory for the real memory
  * areas for guests.
  */
-void kvm_rma_init(void)
+void __init kvm_rma_init(void)
 {
 	unsigned long i;
 	unsigned long j, npages;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 31e1ade..0cfb46d 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -175,9 +175,6 @@ config PPC_INDIRECT_MMIO
 config PPC_IO_WORKAROUNDS
 	bool
 
-config GENERIC_IOMAP
-	bool
-
 source "drivers/cpufreq/Kconfig"
 
 menu "CPU Frequency drivers"
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 8b0c946..3df65d3 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -4,6 +4,7 @@ config SCORE
        def_bool y
        select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
+       select GENERIC_IOMAP
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select ARCH_DISCARD_MEMBLOCK
@@ -36,9 +37,6 @@ endmenu
 config CPU_SCORE7
 	bool
 
-config GENERIC_IOMAP
-	def_bool y
-
 config NO_DMA
 	bool
 	default y
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 47a2f1c..3c8db65 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -85,9 +85,6 @@ config GENERIC_GPIO
 config GENERIC_CALIBRATE_DELAY
 	bool
 
-config GENERIC_IOMAP
-	bool
-
 config GENERIC_CLOCKEVENTS
 	def_bool y
 
@@ -861,6 +858,7 @@ config PCI
 	bool "PCI support"
 	depends on SYS_SUPPORTS_PCI
 	select PCI_DOMAINS
+	select GENERIC_PCI_IOMAP
 	help
 	  Find out whether you have a PCI motherboard. PCI is the name of a
 	  bus system, i.e. the way the CPU talks to the other stuff inside
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c2691af..11aaf2f 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -393,29 +393,6 @@ static void __iomem *ioport_map_pci(struct pci_dev *dev,
 	return (void __iomem *)(chan->io_map_base + port);
 }
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (unlikely(!len || !start))
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-
-	if (flags & IORESOURCE_IO)
-		return ioport_map_pci(dev, start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
 	iounmap(addr);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 868ea08..9665799 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -28,6 +28,7 @@ config SPARC
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select USE_GENERIC_SMP_HELPERS if SMP
+	select GENERIC_PCI_IOMAP
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index c2ced21..2006e5d 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -7,6 +7,7 @@
 
 #include <asm/page.h>      /* IO address mapping routines need this */
 #include <asm/system.h>
+#include <asm-generic/pci_iomap.h>
 
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
 
@@ -324,7 +325,6 @@ extern void ioport_unmap(void __iomem *);
 
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
 
 /*
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 9c89654..9481e5a 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -8,6 +8,7 @@
 #include <asm/page.h>      /* IO address mapping routines need this */
 #include <asm/system.h>
 #include <asm/asi.h>
+#include <asm-generic/pci_iomap.h>
 
 /* PC crapola... */
 #define __SLOW_DOWN_IO	do { } while (0)
@@ -514,7 +515,6 @@ extern void ioport_unmap(void __iomem *);
 
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
 
 static inline int sbus_can_dma_64bit(void)
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index e49b828..aa42fe3 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -143,10 +143,11 @@ struct sigstack {
 #define SA_ONSTACK	_SV_SSTACK
 #define SA_RESTART	_SV_INTR
 #define SA_ONESHOT	_SV_RESET
-#define SA_NOMASK	0x20u
+#define SA_NODEFER	0x20u
 #define SA_NOCLDWAIT    0x100u
 #define SA_SIGINFO      0x200u
 
+#define SA_NOMASK	SA_NODEFER
 
 #define SIG_BLOCK          0x01	/* for blocking signals */
 #define SIG_UNBLOCK        0x02	/* for unblocking signals */
diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c
index 9ef37e1..c4d42a5 100644
--- a/arch/sparc/lib/iomap.c
+++ b/arch/sparc/lib/iomap.c
@@ -18,31 +18,8 @@ void ioport_unmap(void __iomem *addr)
 EXPORT_SYMBOL(ioport_map);
 EXPORT_SYMBOL(ioport_unmap);
 
-/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	/* What? */
-	return NULL;
-}
-
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	/* nothing to do */
 }
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 70a0de4..11270ca 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -321,6 +321,7 @@ config PCI
 	bool "PCI support"
 	default y
 	select PCI_DOMAINS
+	select GENERIC_PCI_IOMAP
 	---help---
 	  Enable PCI root complex support, so PCIe endpoint devices can
 	  be attached to the Tile chip.  Many, but not all, PCI devices
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index c9ea165..d2152de 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -204,7 +204,8 @@ static inline long ioport_panic(void)
 
 static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
 {
-	return (void __iomem *) ioport_panic();
+	pr_info("ioport_map: mapping IO resources is unsupported on tile.\n");
+	return NULL;
 }
 
 static inline void ioport_unmap(void __iomem *addr)
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 7f03cef..1d25fea 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -16,6 +16,7 @@
 #define _ASM_TILE_PCI_H
 
 #include <linux/pci.h>
+#include <asm-generic/pci_iomap.h>
 
 /*
  * Structure of a PCI controller (host bridge)
@@ -49,7 +50,6 @@ struct pci_controller {
 int __devinit tile_pci_init(void);
 int __devinit pcibios_init(void);
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
 
 void __devinit pcibios_fixup_bus(struct pci_bus *bus);
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 9d610d3..2556793 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -466,27 +466,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return 0;
 }
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
-	unsigned long start = pci_resource_start(dev, bar);
-	unsigned long len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len)
-		return NULL;
-	if (max && len > max)
-		len = max;
-
-	if (!(flags & IORESOURCE_MEM)) {
-		pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
-		start = 0;
-	}
-
-	return (void __iomem *)start;
-}
-EXPORT_SYMBOL(pci_iomap);
-
-
 /****************************************************************
  *
  * Tile PCI config space read/write routines
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 942ed61..eeb8054 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -12,6 +12,7 @@ config UNICORE32
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_FRAME_POINTERS
+	select GENERIC_IOMAP
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
@@ -30,9 +31,6 @@ config GENERIC_CLOCKEVENTS
 config GENERIC_CSUM
 	def_bool y
 
-config GENERIC_IOMAP
-	def_bool y
-
 config NO_IOPORT
 	bool
 
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
index 1a5c5a5..adddf6d 100644
--- a/arch/unicore32/include/asm/io.h
+++ b/arch/unicore32/include/asm/io.h
@@ -37,15 +37,9 @@ extern void __uc32_iounmap(volatile void __iomem *addr);
  */
 #define ioremap(cookie, size)		__uc32_ioremap(cookie, size)
 #define ioremap_cached(cookie, size)	__uc32_ioremap_cached(cookie, size)
+#define ioremap_nocache(cookie, size)	__uc32_ioremap(cookie, size)
 #define iounmap(cookie)			__uc32_iounmap(cookie)
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#undef xlate_dev_mem_ptr
-#define xlate_dev_mem_ptr(p)	__va(p)
-
 #define HAVE_ARCH_PIO_SIZE
 #define PIO_OFFSET		(unsigned int)(PCI_IOBASE)
 #define PIO_MASK		(unsigned int)(IO_SPACE_LIMIT)
diff --git a/arch/unicore32/kernel/puv3-nb0916.c b/arch/unicore32/kernel/puv3-nb0916.c
index 37b12a0..181108b 100644
--- a/arch/unicore32/kernel/puv3-nb0916.c
+++ b/arch/unicore32/kernel/puv3-nb0916.c
@@ -123,7 +123,7 @@ int __init mach_nb0916_init(void)
 
 	if (request_irq(gpio_to_irq(GPI_LCD_CASE_OFF),
 		&nb0916_lcdcaseoff_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
 		"NB0916 lcd case off", NULL) < 0) {
 
 		printk(KERN_DEBUG "LCD-Case-OFF IRQ %d not available\n",
@@ -131,7 +131,7 @@ int __init mach_nb0916_init(void)
 	}
 
 	if (request_irq(gpio_to_irq(GPI_OTP_INT), &nb0916_overheat_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
 		"NB0916 overheating protection", NULL) < 0) {
 
 		printk(KERN_DEBUG "Overheating Protection IRQ %d not available\n",
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 673d7a8..87adbf5 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -65,7 +65,7 @@ static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
  */
 static struct resource mem_res[] = {
 	{
-		.name = "Kernel text",
+		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
 		.flags = IORESOURCE_MEM
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index b163fca..911b549 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -63,10 +63,7 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
 	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
 	if (err == 0) {
 		sigdelsetmask(&set, ~_BLOCKABLE);
-		spin_lock_irq(&current->sighand->siglock);
-		current->blocked = set;
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
+		set_current_blocked(&set);
 	}
 
 	err |= __get_user(regs->UCreg_00, &sf->uc.uc_mcontext.regs.UCreg_00);
@@ -321,6 +318,7 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
+	sigset_t blocked;
 	int usig = sig;
 	int ret;
 
@@ -372,13 +370,10 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/*
 	 * Block the signal if we were successful.
 	 */
-	spin_lock_irq(&tsk->sighand->siglock);
-	sigorsets(&tsk->blocked, &tsk->blocked,
-		  &ka->sa.sa_mask);
+	sigorsets(&blocked, &tsk->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&tsk->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&tsk->sighand->siglock);
+		sigaddset(&blocked, sig);
+	set_current_blocked(&blocked);
 
 	return 0;
 }
diff --git a/arch/unicore32/kernel/time.c b/arch/unicore32/kernel/time.c
index 080710c..d3824b2 100644
--- a/arch/unicore32/kernel/time.c
+++ b/arch/unicore32/kernel/time.c
@@ -86,7 +86,7 @@ static struct clocksource cksrc_puv3_oscr = {
 
 static struct irqaction puv3_timer_irq = {
 	.name		= "ost0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= puv3_ost0_interrupt,
 	.dev_id		= &ckevt_puv3_osmr0,
 };
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d2a69d..1a31254 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -62,6 +62,7 @@ config X86
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
+	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_TEXT_POKE_SMP
 	select HAVE_GENERIC_HARDIRQS
@@ -77,6 +78,7 @@ config X86
 	select HAVE_BPF_JIT if (X86_64 && NET)
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select GENERIC_IOMAP
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
@@ -142,9 +144,6 @@ config NEED_SG_DMA_LENGTH
 config GENERIC_ISA_DMA
 	def_bool ISA_DMA_API
 
-config GENERIC_IOMAP
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f3444f7..17c5d4b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -197,7 +197,10 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_BMI1	(9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_AVX2	(9*32+ 5) /* AVX2 instructions */
 #define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2	(9*32+ 8) /* 2nd group bit manipulation extensions */
 #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 345c99c..dffc38e 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
+extern int iommu_group_mf;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index a026507..ab4092e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -181,6 +181,7 @@ struct x86_emulate_ops {
 	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
 	int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
 	int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
+	int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
 	void (*halt)(struct x86_emulate_ctxt *ctxt);
 	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
 	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
@@ -364,6 +365,7 @@ enum x86_intercept {
 #endif
 
 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
 #define EMULATION_FAILED -1
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4973f4..52d6640 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -16,10 +16,12 @@
 #include <linux/mmu_notifier.h>
 #include <linux/tracepoint.h>
 #include <linux/cpumask.h>
+#include <linux/irq_work.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
+#include <linux/perf_event.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -31,6 +33,8 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+
 #define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
@@ -228,7 +232,7 @@ struct kvm_mmu_page {
 	 * One bit set per slot which has memory
 	 * in this shadow page.
 	 */
-	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
 	bool unsync;
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
@@ -239,14 +243,9 @@ struct kvm_mmu_page {
 	int clear_spte_count;
 #endif
 
-	struct rcu_head rcu;
-};
+	int write_flooding_count;
 
-struct kvm_pv_mmu_op_buffer {
-	void *ptr;
-	unsigned len;
-	unsigned processed;
-	char buf[512] __aligned(sizeof(long));
+	struct rcu_head rcu;
 };
 
 struct kvm_pio_request {
@@ -294,6 +293,37 @@ struct kvm_mmu {
 	u64 pdptrs[4]; /* pae */
 };
 
+enum pmc_type {
+	KVM_PMC_GP = 0,
+	KVM_PMC_FIXED,
+};
+
+struct kvm_pmc {
+	enum pmc_type type;
+	u8 idx;
+	u64 counter;
+	u64 eventsel;
+	struct perf_event *perf_event;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pmu {
+	unsigned nr_arch_gp_counters;
+	unsigned nr_arch_fixed_counters;
+	unsigned available_event_types;
+	u64 fixed_ctr_ctrl;
+	u64 global_ctrl;
+	u64 global_status;
+	u64 global_ovf_ctrl;
+	u64 counter_bitmask[2];
+	u64 global_ctrl_mask;
+	u8 version;
+	struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC];
+	struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED];
+	struct irq_work irq_work;
+	u64 reprogram_pmi;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -345,19 +375,10 @@ struct kvm_vcpu_arch {
 	 */
 	struct kvm_mmu *walk_mmu;
 
-	/* only needed in kvm_pv_mmu_op() path, but it's hot so
-	 * put it here to avoid allocation */
-	struct kvm_pv_mmu_op_buffer mmu_op_buffer;
-
 	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
 	struct kvm_mmu_memory_cache mmu_page_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
-	gfn_t last_pt_write_gfn;
-	int   last_pt_write_count;
-	u64  *last_pte_updated;
-	gfn_t last_pte_gfn;
-
 	struct fpu guest_fpu;
 	u64 xcr0;
 
@@ -436,6 +457,8 @@ struct kvm_vcpu_arch {
 	unsigned access;
 	gfn_t mmio_gfn;
 
+	struct kvm_pmu pmu;
+
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
@@ -444,6 +467,9 @@ struct kvm_vcpu_arch {
 
 	cpumask_var_t wbinvd_dirty_mask;
 
+	unsigned long last_retry_eip;
+	unsigned long last_retry_addr;
+
 	struct {
 		bool halted;
 		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -459,7 +485,6 @@ struct kvm_arch {
 	unsigned int n_requested_mmu_pages;
 	unsigned int n_max_mmu_pages;
 	unsigned int indirect_shadow_pages;
-	atomic_t invlpg_counter;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
 	 * Hash table of struct kvm_mmu_page.
@@ -660,6 +685,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
+			       struct kvm_memory_slot *slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@@ -668,8 +695,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
-int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
-		  gpa_t addr, unsigned long *ret);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
@@ -692,6 +717,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
+#define EMULTYPE_RETRY		    (1 << 3)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
@@ -734,6 +760,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+bool kvm_rdpmc(struct kvm_vcpu *vcpu);
 
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
@@ -754,13 +781,14 @@ int fx_init(struct kvm_vcpu *vcpu);
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes,
-		       bool guest_initiated);
+		       const u8 *new, int bytes);
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 			      struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -782,6 +810,11 @@ void kvm_disable_tdp(void);
 int complete_pio(struct kvm_vcpu *vcpu);
 bool kvm_check_iopl(struct kvm_vcpu *vcpu);
 
+static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+{
+	return gpa;
+}
+
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -894,4 +927,17 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
+int kvm_is_in_guest(void);
+
+void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+void kvm_pmu_reset(struct kvm_vcpu *vcpu);
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
+void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a9c2116..f0c6fd6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,8 +39,6 @@
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
 
-#define MMU_QUEUE_SIZE 1024
-
 static int kvmapf = 1;
 
 static int parse_no_kvmapf(char *arg)
@@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg)
 
 early_param("no-steal-acc", parse_no_stealacc);
 
-struct kvm_para_state {
-	u8 mmu_queue[MMU_QUEUE_SIZE];
-	int mmu_queue_len;
-};
-
-static DEFINE_PER_CPU(struct kvm_para_state, para_state);
 static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
 static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
 
-static struct kvm_para_state *kvm_para_state(void)
-{
-	return &per_cpu(para_state, raw_smp_processor_id());
-}
-
 /*
  * No need for any "IO delay" on KVM
  */
@@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 	}
 }
 
-static void kvm_mmu_op(void *buffer, unsigned len)
-{
-	int r;
-	unsigned long a1, a2;
-
-	do {
-		a1 = __pa(buffer);
-		a2 = 0;   /* on i386 __pa() always returns <4G */
-		r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
-		buffer += r;
-		len -= r;
-	} while (len);
-}
-
-static void mmu_queue_flush(struct kvm_para_state *state)
-{
-	if (state->mmu_queue_len) {
-		kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
-		state->mmu_queue_len = 0;
-	}
-}
-
-static void kvm_deferred_mmu_op(void *buffer, int len)
-{
-	struct kvm_para_state *state = kvm_para_state();
-
-	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
-		kvm_mmu_op(buffer, len);
-		return;
-	}
-	if (state->mmu_queue_len + len > sizeof state->mmu_queue)
-		mmu_queue_flush(state);
-	memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
-	state->mmu_queue_len += len;
-}
-
-static void kvm_mmu_write(void *dest, u64 val)
-{
-	__u64 pte_phys;
-	struct kvm_mmu_op_write_pte wpte;
-
-#ifdef CONFIG_HIGHPTE
-	struct page *page;
-	unsigned long dst = (unsigned long) dest;
-
-	page = kmap_atomic_to_page(dest);
-	pte_phys = page_to_pfn(page);
-	pte_phys <<= PAGE_SHIFT;
-	pte_phys += (dst & ~(PAGE_MASK));
-#else
-	pte_phys = (unsigned long)__pa(dest);
-#endif
-	wpte.header.op = KVM_MMU_OP_WRITE_PTE;
-	wpte.pte_val = val;
-	wpte.pte_phys = pte_phys;
-
-	kvm_deferred_mmu_op(&wpte, sizeof wpte);
-}
-
-/*
- * We only need to hook operations that are MMU writes.  We hook these so that
- * we can use lazy MMU mode to batch these operations.  We could probably
- * improve the performance of the host code if we used some of the information
- * here to simplify processing of batched writes.
- */
-static void kvm_set_pte(pte_t *ptep, pte_t pte)
-{
-	kvm_mmu_write(ptep, pte_val(pte));
-}
-
-static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep, pte_t pte)
-{
-	kvm_mmu_write(ptep, pte_val(pte));
-}
-
-static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
-{
-	kvm_mmu_write(pmdp, pmd_val(pmd));
-}
-
-#if PAGETABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
-static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
-	kvm_mmu_write(ptep, pte_val(pte));
-}
-
-static void kvm_pte_clear(struct mm_struct *mm,
-			  unsigned long addr, pte_t *ptep)
-{
-	kvm_mmu_write(ptep, 0);
-}
-
-static void kvm_pmd_clear(pmd_t *pmdp)
-{
-	kvm_mmu_write(pmdp, 0);
-}
-#endif
-
-static void kvm_set_pud(pud_t *pudp, pud_t pud)
-{
-	kvm_mmu_write(pudp, pud_val(pud));
-}
-
-#if PAGETABLE_LEVELS == 4
-static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
-{
-	kvm_mmu_write(pgdp, pgd_val(pgd));
-}
-#endif
-#endif /* PAGETABLE_LEVELS >= 3 */
-
-static void kvm_flush_tlb(void)
-{
-	struct kvm_mmu_op_flush_tlb ftlb = {
-		.header.op = KVM_MMU_OP_FLUSH_TLB,
-	};
-
-	kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
-}
-
-static void kvm_release_pt(unsigned long pfn)
-{
-	struct kvm_mmu_op_release_pt rpt = {
-		.header.op = KVM_MMU_OP_RELEASE_PT,
-		.pt_phys = (u64)pfn << PAGE_SHIFT,
-	};
-
-	kvm_mmu_op(&rpt, sizeof rpt);
-}
-
-static void kvm_enter_lazy_mmu(void)
-{
-	paravirt_enter_lazy_mmu();
-}
-
-static void kvm_leave_lazy_mmu(void)
-{
-	struct kvm_para_state *state = kvm_para_state();
-
-	mmu_queue_flush(state);
-	paravirt_leave_lazy_mmu();
-}
-
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
@@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void)
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
 
-	if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
-		pv_mmu_ops.set_pte = kvm_set_pte;
-		pv_mmu_ops.set_pte_at = kvm_set_pte_at;
-		pv_mmu_ops.set_pmd = kvm_set_pmd;
-#if PAGETABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
-		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
-		pv_mmu_ops.pte_clear = kvm_pte_clear;
-		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
-#endif
-		pv_mmu_ops.set_pud = kvm_set_pud;
-#if PAGETABLE_LEVELS == 4
-		pv_mmu_ops.set_pgd = kvm_set_pgd;
-#endif
-#endif
-		pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
-		pv_mmu_ops.release_pte = kvm_release_pt;
-		pv_mmu_ops.release_pmd = kvm_release_pt;
-		pv_mmu_ops.release_pud = kvm_release_pt;
-
-		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
-		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
-	}
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
 #endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 80dc793..1c4d769 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0;
  */
 int iommu_pass_through __read_mostly;
 
+/*
+ * Group multi-function PCI devices into a single device-group for the
+ * iommu_device_group interface.  This tells the iommu driver to pretend
+ * it cannot distinguish between functions of a device, exposing only one
+ * group for the device.  Useful for disallowing use of individual PCI
+ * functions from userspace drivers.
+ */
+int iommu_group_mf __read_mostly;
+
 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
 
 /* Dummy device used for NULL arguments (normally ISA). */
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p)
 #endif
 		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
+		if (!strncmp(p, "group_mf", 8))
+			iommu_group_mf = 1;
 
 		gart_parse_options(p);
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 54ddaeb2..46a01bdc 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -682,7 +682,6 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 		struct pt_regs *regs)
 {
-	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
-	set_current_blocked(&blocked);
+	block_sigmask(ka, sig);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ff5790d..1a7fe86 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -35,6 +35,7 @@ config KVM
 	select KVM_MMIO
 	select TASKSTATS
 	select TASK_DELAY_ACCT
+	select PERF_EVENTS
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
@@ -52,6 +53,8 @@ config KVM
 config KVM_INTEL
 	tristate "KVM for Intel processors support"
 	depends on KVM
+	# for perf_guest_get_msrs():
+	depends on CPU_SUP_INTEL
 	---help---
 	  Provides support for KVM on Intel processors equipped with the VT
 	  extensions.
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index f15501f43..4f579e8 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o timer.o
+			   i8254.o timer.o cpuid.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
new file mode 100644
index 0000000..89b02bf
--- /dev/null
+++ b/arch/x86/kvm/cpuid.c
@@ -0,0 +1,670 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ * cpuid support routines
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates.
+ * Copyright IBM Corporation, 2008
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+#include <asm/user.h>
+#include <asm/xsave.h>
+#include "cpuid.h"
+#include "lapic.h"
+#include "mmu.h"
+#include "trace.h"
+
+void kvm_update_cpuid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (!best)
+		return;
+
+	/* Update OSXSAVE bit */
+	if (cpu_has_xsave && best->function == 0x1) {
+		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
+			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+	}
+
+	if (apic) {
+		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+			apic->lapic_timer.timer_mode_mask = 3 << 17;
+		else
+			apic->lapic_timer.timer_mode_mask = 1 << 17;
+	}
+
+	kvm_pmu_cpuid_update(vcpu);
+}
+
+static int is_efer_nx(void)
+{
+	unsigned long long efer = 0;
+
+	rdmsrl_safe(MSR_EFER, &efer);
+	return efer & EFER_NX;
+}
+
+static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_cpuid_entry2 *e, *entry;
+
+	entry = NULL;
+	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+		e = &vcpu->arch.cpuid_entries[i];
+		if (e->function == 0x80000001) {
+			entry = e;
+			break;
+		}
+	}
+	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
+		entry->edx &= ~(1 << 20);
+		printk(KERN_INFO "kvm: guest NX capability removed\n");
+	}
+}
+
+/* when an old userspace process fills a new kernel module */
+int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+			     struct kvm_cpuid *cpuid,
+			     struct kvm_cpuid_entry __user *entries)
+{
+	int r, i;
+	struct kvm_cpuid_entry *cpuid_entries;
+
+	r = -E2BIG;
+	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+		goto out;
+	r = -ENOMEM;
+	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
+	if (!cpuid_entries)
+		goto out;
+	r = -EFAULT;
+	if (copy_from_user(cpuid_entries, entries,
+			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
+		goto out_free;
+	for (i = 0; i < cpuid->nent; i++) {
+		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
+		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
+		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
+		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
+		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
+		vcpu->arch.cpuid_entries[i].index = 0;
+		vcpu->arch.cpuid_entries[i].flags = 0;
+		vcpu->arch.cpuid_entries[i].padding[0] = 0;
+		vcpu->arch.cpuid_entries[i].padding[1] = 0;
+		vcpu->arch.cpuid_entries[i].padding[2] = 0;
+	}
+	vcpu->arch.cpuid_nent = cpuid->nent;
+	cpuid_fix_nx_cap(vcpu);
+	r = 0;
+	kvm_apic_set_version(vcpu);
+	kvm_x86_ops->cpuid_update(vcpu);
+	kvm_update_cpuid(vcpu);
+
+out_free:
+	vfree(cpuid_entries);
+out:
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
+			      struct kvm_cpuid2 *cpuid,
+			      struct kvm_cpuid_entry2 __user *entries)
+{
+	int r;
+
+	r = -E2BIG;
+	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+		goto out;
+	r = -EFAULT;
+	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
+			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
+		goto out;
+	vcpu->arch.cpuid_nent = cpuid->nent;
+	kvm_apic_set_version(vcpu);
+	kvm_x86_ops->cpuid_update(vcpu);
+	kvm_update_cpuid(vcpu);
+	return 0;
+
+out:
+	return r;
+}
+
+int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
+			      struct kvm_cpuid2 *cpuid,
+			      struct kvm_cpuid_entry2 __user *entries)
+{
+	int r;
+
+	r = -E2BIG;
+	if (cpuid->nent < vcpu->arch.cpuid_nent)
+		goto out;
+	r = -EFAULT;
+	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
+			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+		goto out;
+	return 0;
+
+out:
+	cpuid->nent = vcpu->arch.cpuid_nent;
+	return r;
+}
+
+static void cpuid_mask(u32 *word, int wordnum)
+{
+	*word &= boot_cpu_data.x86_capability[wordnum];
+}
+
+static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+			   u32 index)
+{
+	entry->function = function;
+	entry->index = index;
+	cpuid_count(entry->function, entry->index,
+		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+	entry->flags = 0;
+}
+
+static bool supported_xcr0_bit(unsigned bit)
+{
+	u64 mask = ((u64)1 << bit);
+
+	return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+}
+
+#define F(x) bit(X86_FEATURE_##x)
+
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+			 u32 index, int *nent, int maxnent)
+{
+	int r;
+	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
+#ifdef CONFIG_X86_64
+	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
+				? F(GBPAGES) : 0;
+	unsigned f_lm = F(LM);
+#else
+	unsigned f_gbpages = 0;
+	unsigned f_lm = 0;
+#endif
+	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+
+	/* cpuid 1.edx */
+	const u32 kvm_supported_word0_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
+		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+		0 /* HTT, TM, Reserved, PBE */;
+	/* cpuid 0x80000001.edx */
+	const u32 kvm_supported_word1_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* Reserved */ |
+		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
+		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
+	/* cpuid 1.ecx */
+	const u32 kvm_supported_word4_x86_features =
+		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
+		0 /* DS-CPL, VMX, SMX, EST */ |
+		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
+		0 /* Reserved, DCA */ | F(XMM4_1) |
+		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
+		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+		F(F16C) | F(RDRAND);
+	/* cpuid 0x80000001.ecx */
+	const u32 kvm_supported_word6_x86_features =
+		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
+		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+
+	/* cpuid 0xC0000001.edx */
+	const u32 kvm_supported_word5_x86_features =
+		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+		F(PMM) | F(PMM_EN);
+
+	/* cpuid 7.0.ebx */
+	const u32 kvm_supported_word9_x86_features =
+		F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS);
+
+	/* all calls to cpuid_count() should be made on the same cpu */
+	get_cpu();
+
+	r = -E2BIG;
+
+	if (*nent >= maxnent)
+		goto out;
+
+	do_cpuid_1_ent(entry, function, index);
+	++*nent;
+
+	switch (function) {
+	case 0:
+		entry->eax = min(entry->eax, (u32)0xd);
+		break;
+	case 1:
+		entry->edx &= kvm_supported_word0_x86_features;
+		cpuid_mask(&entry->edx, 0);
+		entry->ecx &= kvm_supported_word4_x86_features;
+		cpuid_mask(&entry->ecx, 4);
+		/* we support x2apic emulation even if host does not support
+		 * it since we emulate x2apic in software */
+		entry->ecx |= F(X2APIC);
+		break;
+	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
+	 * may return different values. This forces us to get_cpu() before
+	 * issuing the first command, and also to emulate this annoying behavior
+	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
+	case 2: {
+		int t, times = entry->eax & 0xff;
+
+		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+		for (t = 1; t < times; ++t) {
+			if (*nent >= maxnent)
+				goto out;
+
+			do_cpuid_1_ent(&entry[t], function, 0);
+			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+			++*nent;
+		}
+		break;
+	}
+	/* function 4 has additional index. */
+	case 4: {
+		int i, cache_type;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		/* read more entries until cache_type is zero */
+		for (i = 1; ; ++i) {
+			if (*nent >= maxnent)
+				goto out;
+
+			cache_type = entry[i - 1].eax & 0x1f;
+			if (!cache_type)
+				break;
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+		}
+		break;
+	}
+	case 7: {
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		/* Mask ebx against host capbability word 9 */
+		if (index == 0) {
+			entry->ebx &= kvm_supported_word9_x86_features;
+			cpuid_mask(&entry->ebx, 9);
+		} else
+			entry->ebx = 0;
+		entry->eax = 0;
+		entry->ecx = 0;
+		entry->edx = 0;
+		break;
+	}
+	case 9:
+		break;
+	case 0xa: { /* Architectural Performance Monitoring */
+		struct x86_pmu_capability cap;
+		union cpuid10_eax eax;
+		union cpuid10_edx edx;
+
+		perf_get_x86_pmu_capability(&cap);
+
+		/*
+		 * Only support guest architectural pmu on a host
+		 * with architectural pmu.
+		 */
+		if (!cap.version)
+			memset(&cap, 0, sizeof(cap));
+
+		eax.split.version_id = min(cap.version, 2);
+		eax.split.num_counters = cap.num_counters_gp;
+		eax.split.bit_width = cap.bit_width_gp;
+		eax.split.mask_length = cap.events_mask_len;
+
+		edx.split.num_counters_fixed = cap.num_counters_fixed;
+		edx.split.bit_width_fixed = cap.bit_width_fixed;
+		edx.split.reserved = 0;
+
+		entry->eax = eax.full;
+		entry->ebx = cap.events_mask;
+		entry->ecx = 0;
+		entry->edx = edx.full;
+		break;
+	}
+	/* function 0xb has additional index. */
+	case 0xb: {
+		int i, level_type;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		/* read more entries until level_type is zero */
+		for (i = 1; ; ++i) {
+			if (*nent >= maxnent)
+				goto out;
+
+			level_type = entry[i - 1].ecx & 0xff00;
+			if (!level_type)
+				break;
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+		}
+		break;
+	}
+	case 0xd: {
+		int idx, i;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		for (idx = 1, i = 1; idx < 64; ++idx) {
+			if (*nent >= maxnent)
+				goto out;
+
+			do_cpuid_1_ent(&entry[i], function, idx);
+			if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+				continue;
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+			++i;
+		}
+		break;
+	}
+	case KVM_CPUID_SIGNATURE: {
+		char signature[12] = "KVMKVMKVM\0\0";
+		u32 *sigptr = (u32 *)signature;
+		entry->eax = 0;
+		entry->ebx = sigptr[0];
+		entry->ecx = sigptr[1];
+		entry->edx = sigptr[2];
+		break;
+	}
+	case KVM_CPUID_FEATURES:
+		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
+			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
+			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
+			     (1 << KVM_FEATURE_ASYNC_PF) |
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+
+		if (sched_info_on())
+			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
+
+		entry->ebx = 0;
+		entry->ecx = 0;
+		entry->edx = 0;
+		break;
+	case 0x80000000:
+		entry->eax = min(entry->eax, 0x8000001a);
+		break;
+	case 0x80000001:
+		entry->edx &= kvm_supported_word1_x86_features;
+		cpuid_mask(&entry->edx, 1);
+		entry->ecx &= kvm_supported_word6_x86_features;
+		cpuid_mask(&entry->ecx, 6);
+		break;
+	case 0x80000008: {
+		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+		unsigned phys_as = entry->eax & 0xff;
+
+		if (!g_phys_as)
+			g_phys_as = phys_as;
+		entry->eax = g_phys_as | (virt_as << 8);
+		entry->ebx = entry->edx = 0;
+		break;
+	}
+	case 0x80000019:
+		entry->ecx = entry->edx = 0;
+		break;
+	case 0x8000001a:
+		break;
+	case 0x8000001d:
+		break;
+	/*Add support for Centaur's CPUID instruction*/
+	case 0xC0000000:
+		/*Just support up to 0xC0000004 now*/
+		entry->eax = min(entry->eax, 0xC0000004);
+		break;
+	case 0xC0000001:
+		entry->edx &= kvm_supported_word5_x86_features;
+		cpuid_mask(&entry->edx, 5);
+		break;
+	case 3: /* Processor serial number */
+	case 5: /* MONITOR/MWAIT */
+	case 6: /* Thermal management */
+	case 0x80000007: /* Advanced power management */
+	case 0xC0000002:
+	case 0xC0000003:
+	case 0xC0000004:
+	default:
+		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+		break;
+	}
+
+	kvm_x86_ops->set_supported_cpuid(function, entry);
+
+	r = 0;
+
+out:
+	put_cpu();
+
+	return r;
+}
+
+#undef F
+
+struct kvm_cpuid_param {
+	u32 func;
+	u32 idx;
+	bool has_leaf_count;
+	bool (*qualifier)(struct kvm_cpuid_param *param);
+};
+
+static bool is_centaur_cpu(struct kvm_cpuid_param *param)
+{
+	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
+}
+
+int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+				      struct kvm_cpuid_entry2 __user *entries)
+{
+	struct kvm_cpuid_entry2 *cpuid_entries;
+	int limit, nent = 0, r = -E2BIG, i;
+	u32 func;
+	static struct kvm_cpuid_param param[] = {
+		{ .func = 0, .has_leaf_count = true },
+		{ .func = 0x80000000, .has_leaf_count = true },
+		{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
+		{ .func = KVM_CPUID_SIGNATURE },
+		{ .func = KVM_CPUID_FEATURES },
+	};
+
+	if (cpuid->nent < 1)
+		goto out;
+	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+	r = -ENOMEM;
+	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
+	if (!cpuid_entries)
+		goto out;
+
+	r = 0;
+	for (i = 0; i < ARRAY_SIZE(param); i++) {
+		struct kvm_cpuid_param *ent = &param[i];
+
+		if (ent->qualifier && !ent->qualifier(ent))
+			continue;
+
+		r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
+				&nent, cpuid->nent);
+
+		if (r)
+			goto out_free;
+
+		if (!ent->has_leaf_count)
+			continue;
+
+		limit = cpuid_entries[nent - 1].eax;
+		for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
+			r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
+				     &nent, cpuid->nent);
+
+		if (r)
+			goto out_free;
+	}
+
+	r = -EFAULT;
+	if (copy_to_user(entries, cpuid_entries,
+			 nent * sizeof(struct kvm_cpuid_entry2)))
+		goto out_free;
+	cpuid->nent = nent;
+	r = 0;
+
+out_free:
+	vfree(cpuid_entries);
+out:
+	return r;
+}
+
+static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
+{
+	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
+	int j, nent = vcpu->arch.cpuid_nent;
+
+	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
+	/* when no next entry is found, the current entry[i] is reselected */
+	for (j = i + 1; ; j = (j + 1) % nent) {
+		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
+		if (ej->function == e->function) {
+			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+			return j;
+		}
+	}
+	return 0; /* silence gcc, even though control never reaches here */
+}
+
+/* find an entry with matching function, matching index (if needed), and that
+ * should be read next (if it's stateful) */
+static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
+	u32 function, u32 index)
+{
+	if (e->function != function)
+		return 0;
+	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
+		return 0;
+	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
+	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+		return 0;
+	return 1;
+}
+
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+					      u32 function, u32 index)
+{
+	int i;
+	struct kvm_cpuid_entry2 *best = NULL;
+
+	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+		struct kvm_cpuid_entry2 *e;
+
+		e = &vcpu->arch.cpuid_entries[i];
+		if (is_matching_cpuid_entry(e, function, index)) {
+			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
+				move_to_next_stateful_cpuid_entry(vcpu, i);
+			best = e;
+			break;
+		}
+	}
+	return best;
+}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
+
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+	if (!best || best->eax < 0x80000008)
+		goto not_found;
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+not_found:
+	return 36;
+}
+
+/*
+ * If no match is found, check whether we exceed the vCPU's limit
+ * and return the content of the highest valid _standard_ leaf instead.
+ * This is to satisfy the CPUID specification.
+ */
+static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
+                                                  u32 function, u32 index)
+{
+	struct kvm_cpuid_entry2 *maxlevel;
+
+	maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+	if (!maxlevel || maxlevel->eax >= function)
+		return NULL;
+	if (function & 0x80000000) {
+		maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
+		if (!maxlevel)
+			return NULL;
+	}
+	return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+	u32 function, index;
+	struct kvm_cpuid_entry2 *best;
+
+	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+	best = kvm_find_cpuid_entry(vcpu, function, index);
+
+	if (!best)
+		best = check_cpuid_limit(vcpu, function, index);
+
+	if (best) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
+		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
+		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
+		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
+	}
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	trace_kvm_cpuid(function,
+			kvm_register_read(vcpu, VCPU_REGS_RAX),
+			kvm_register_read(vcpu, VCPU_REGS_RBX),
+			kvm_register_read(vcpu, VCPU_REGS_RCX),
+			kvm_register_read(vcpu, VCPU_REGS_RDX));
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
new file mode 100644
index 0000000..5b97e17
--- /dev/null
+++ b/arch/x86/kvm/cpuid.h
@@ -0,0 +1,46 @@
+#ifndef ARCH_X86_KVM_CPUID_H
+#define ARCH_X86_KVM_CPUID_H
+
+#include "x86.h"
+
+void kvm_update_cpuid(struct kvm_vcpu *vcpu);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+					      u32 function, u32 index);
+int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+				      struct kvm_cpuid_entry2 __user *entries);
+int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+			     struct kvm_cpuid *cpuid,
+			     struct kvm_cpuid_entry __user *entries);
+int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
+			      struct kvm_cpuid2 *cpuid,
+			      struct kvm_cpuid_entry2 __user *entries);
+int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
+			      struct kvm_cpuid2 *cpuid,
+			      struct kvm_cpuid_entry2 __user *entries);
+
+
+static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+}
+
+static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ebx & bit(X86_FEATURE_SMEP));
+}
+
+static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
+}
+
+#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f1e3be18..05a562b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -125,8 +125,9 @@
 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 #define No64	    (1<<28)
+#define PageTable   (1 << 29)   /* instruction used to write page table */
 /* Source 2 operand type */
-#define Src2Shift   (29)
+#define Src2Shift   (30)
 #define Src2None    (OpNone << Src2Shift)
 #define Src2CL      (OpCL << Src2Shift)
 #define Src2ImmByte (OpImmByte << Src2Shift)
@@ -1674,11 +1675,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
-static int em_grp1a(struct x86_emulate_ctxt *ctxt)
-{
-	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes);
-}
-
 static int em_grp2(struct x86_emulate_ctxt *ctxt)
 {
 	switch (ctxt->modrm_reg) {
@@ -1788,7 +1784,7 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int em_grp9(struct x86_emulate_ctxt *ctxt)
+static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 {
 	u64 old = ctxt->dst.orig_val64;
 
@@ -1831,6 +1827,24 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
+static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
+{
+	/* Save real source value, then compare EAX against destination. */
+	ctxt->src.orig_val = ctxt->src.val;
+	ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
+	emulate_2op_SrcV(ctxt, "cmp");
+
+	if (ctxt->eflags & EFLG_ZF) {
+		/* Success: write back to memory. */
+		ctxt->dst.val = ctxt->src.orig_val;
+	} else {
+		/* Failure: write the value we saw to EAX. */
+		ctxt->dst.type = OP_REG;
+		ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
+	}
+	return X86EMUL_CONTINUE;
+}
+
 static int em_lseg(struct x86_emulate_ctxt *ctxt)
 {
 	int seg = ctxt->src2.val;
@@ -2481,6 +2495,15 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_call(struct x86_emulate_ctxt *ctxt)
+{
+	long rel = ctxt->src.val;
+
+	ctxt->src.val = (unsigned long)ctxt->_eip;
+	jmp_rel(ctxt, rel);
+	return em_push(ctxt);
+}
+
 static int em_call_far(struct x86_emulate_ctxt *ctxt)
 {
 	u16 sel, old_cs;
@@ -2622,12 +2645,75 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
+{
+	u64 pmc;
+
+	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
+		return emulate_gp(ctxt, 0);
+	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
+	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
+	return X86EMUL_CONTINUE;
+}
+
 static int em_mov(struct x86_emulate_ctxt *ctxt)
 {
 	ctxt->dst.val = ctxt->src.val;
 	return X86EMUL_CONTINUE;
 }
 
+static int em_cr_write(struct x86_emulate_ctxt *ctxt)
+{
+	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+		return emulate_gp(ctxt, 0);
+
+	/* Disable writeback. */
+	ctxt->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_dr_write(struct x86_emulate_ctxt *ctxt)
+{
+	unsigned long val;
+
+	if (ctxt->mode == X86EMUL_MODE_PROT64)
+		val = ctxt->src.val & ~0ULL;
+	else
+		val = ctxt->src.val & ~0U;
+
+	/* #UD condition is already handled. */
+	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
+		return emulate_gp(ctxt, 0);
+
+	/* Disable writeback. */
+	ctxt->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
+{
+	u64 msr_data;
+
+	msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
+		| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
+	if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
+{
+	u64 msr_data;
+
+	if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data))
+		return emulate_gp(ctxt, 0);
+
+	ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
+	ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
+	return X86EMUL_CONTINUE;
+}
+
 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
 {
 	if (ctxt->modrm_reg > VCPU_SREG_GS)
@@ -2775,6 +2861,24 @@ static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_in(struct x86_emulate_ctxt *ctxt)
+{
+	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
+			     &ctxt->dst.val))
+		return X86EMUL_IO_NEEDED;
+
+	return X86EMUL_CONTINUE;
+}
+
+static int em_out(struct x86_emulate_ctxt *ctxt)
+{
+	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
+				    &ctxt->src.val, 1);
+	/* Disable writeback. */
+	ctxt->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
 static int em_cli(struct x86_emulate_ctxt *ctxt)
 {
 	if (emulator_bad_iopl(ctxt))
@@ -2794,6 +2898,69 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_bt(struct x86_emulate_ctxt *ctxt)
+{
+	/* Disable writeback. */
+	ctxt->dst.type = OP_NONE;
+	/* only subword offset */
+	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
+
+	emulate_2op_SrcV_nobyte(ctxt, "bt");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_bts(struct x86_emulate_ctxt *ctxt)
+{
+	emulate_2op_SrcV_nobyte(ctxt, "bts");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_btr(struct x86_emulate_ctxt *ctxt)
+{
+	emulate_2op_SrcV_nobyte(ctxt, "btr");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_btc(struct x86_emulate_ctxt *ctxt)
+{
+	emulate_2op_SrcV_nobyte(ctxt, "btc");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_bsf(struct x86_emulate_ctxt *ctxt)
+{
+	u8 zf;
+
+	__asm__ ("bsf %2, %0; setz %1"
+		 : "=r"(ctxt->dst.val), "=q"(zf)
+		 : "r"(ctxt->src.val));
+
+	ctxt->eflags &= ~X86_EFLAGS_ZF;
+	if (zf) {
+		ctxt->eflags |= X86_EFLAGS_ZF;
+		/* Disable writeback. */
+		ctxt->dst.type = OP_NONE;
+	}
+	return X86EMUL_CONTINUE;
+}
+
+static int em_bsr(struct x86_emulate_ctxt *ctxt)
+{
+	u8 zf;
+
+	__asm__ ("bsr %2, %0; setz %1"
+		 : "=r"(ctxt->dst.val), "=q"(zf)
+		 : "r"(ctxt->src.val));
+
+	ctxt->eflags &= ~X86_EFLAGS_ZF;
+	if (zf) {
+		ctxt->eflags |= X86_EFLAGS_ZF;
+		/* Disable writeback. */
+		ctxt->dst.type = OP_NONE;
+	}
+	return X86EMUL_CONTINUE;
+}
+
 static bool valid_cr(int nr)
 {
 	switch (nr) {
@@ -2867,9 +3034,6 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 		break;
 		}
 	case 4: {
-		u64 cr4;
-
-		cr4 = ctxt->ops->get_cr(ctxt, 4);
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 
 		if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
@@ -3003,6 +3167,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
+#define I2bvIP(_f, _e, _i, _p) \
+	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
 
 #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
 		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
@@ -3033,17 +3199,17 @@ static struct opcode group7_rm7[] = {
 
 static struct opcode group1[] = {
 	I(Lock, em_add),
-	I(Lock, em_or),
+	I(Lock | PageTable, em_or),
 	I(Lock, em_adc),
 	I(Lock, em_sbb),
-	I(Lock, em_and),
+	I(Lock | PageTable, em_and),
 	I(Lock, em_sub),
 	I(Lock, em_xor),
 	I(0, em_cmp),
 };
 
 static struct opcode group1A[] = {
-	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
+	I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N,
 };
 
 static struct opcode group3[] = {
@@ -3058,16 +3224,19 @@ static struct opcode group3[] = {
 };
 
 static struct opcode group4[] = {
-	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
+	I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
+	I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
 	N, N, N, N, N, N,
 };
 
 static struct opcode group5[] = {
-	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
-	D(SrcMem | ModRM | Stack),
+	I(DstMem | SrcNone | ModRM | Lock, em_grp45),
+	I(DstMem | SrcNone | ModRM | Lock, em_grp45),
+	I(SrcMem | ModRM | Stack, em_grp45),
 	I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
-	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
-	D(SrcMem | ModRM | Stack), N,
+	I(SrcMem | ModRM | Stack, em_grp45),
+	I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45),
+	I(SrcMem | ModRM | Stack, em_grp45), N,
 };
 
 static struct opcode group6[] = {
@@ -3096,18 +3265,21 @@ static struct group_dual group7 = { {
 
 static struct opcode group8[] = {
 	N, N, N, N,
-	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
-	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
+	I(DstMem | SrcImmByte | ModRM, em_bt),
+	I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts),
+	I(DstMem | SrcImmByte | ModRM | Lock, em_btr),
+	I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc),
 };
 
 static struct group_dual group9 = { {
-	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
+	N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
 }, {
 	N, N, N, N, N, N, N, N,
 } };
 
 static struct opcode group11[] = {
-	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
+	I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov),
+	X7(D(Undefined)),
 };
 
 static struct gprefix pfx_0f_6f_0f_7f = {
@@ -3120,7 +3292,7 @@ static struct opcode opcode_table[256] = {
 	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
 	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
 	/* 0x08 - 0x0F */
-	I6ALU(Lock, em_or),
+	I6ALU(Lock | PageTable, em_or),
 	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
 	N,
 	/* 0x10 - 0x17 */
@@ -3132,7 +3304,7 @@ static struct opcode opcode_table[256] = {
 	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
 	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
 	/* 0x20 - 0x27 */
-	I6ALU(Lock, em_and), N, N,
+	I6ALU(Lock | PageTable, em_and), N, N,
 	/* 0x28 - 0x2F */
 	I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
 	/* 0x30 - 0x37 */
@@ -3155,8 +3327,8 @@ static struct opcode opcode_table[256] = {
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */
-	D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */
+	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
+	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
 	/* 0x80 - 0x87 */
@@ -3165,11 +3337,11 @@ static struct opcode opcode_table[256] = {
 	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
 	G(DstMem | SrcImmByte | ModRM | Group, group1),
 	I2bv(DstMem | SrcReg | ModRM, em_test),
-	I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg),
+	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
 	/* 0x88 - 0x8F */
-	I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
+	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
 	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
-	I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg),
+	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
 	D(ModRM | SrcMem | NoAccess | DstReg),
 	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
 	G(0, group1A),
@@ -3182,7 +3354,7 @@ static struct opcode opcode_table[256] = {
 	II(ImplicitOps | Stack, em_popf, popf), N, N,
 	/* 0xA0 - 0xA7 */
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
-	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
+	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstDI | String, em_cmp),
 	/* 0xA8 - 0xAF */
@@ -3213,13 +3385,13 @@ static struct opcode opcode_table[256] = {
 	/* 0xE0 - 0xE7 */
 	X3(I(SrcImmByte, em_loop)),
 	I(SrcImmByte, em_jcxz),
-	D2bvIP(SrcImmUByte | DstAcc, in,  check_perm_in),
-	D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
+	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
+	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
+	I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
 	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
-	D2bvIP(SrcDX | DstAcc, in,  check_perm_in),
-	D2bvIP(SrcAcc | DstDX, out, check_perm_out),
+	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
+	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
 	N, DI(ImplicitOps, icebp), N, N,
 	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
@@ -3242,15 +3414,15 @@ static struct opcode twobyte_table[256] = {
 	/* 0x20 - 0x2F */
 	DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
 	DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
-	DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write),
-	DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write),
+	IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
+	IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
 	N, N, N, N,
 	N, N, N, N, N, N, N, N,
 	/* 0x30 - 0x3F */
-	DI(ImplicitOps | Priv, wrmsr),
+	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
 	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
-	DI(ImplicitOps | Priv, rdmsr),
-	DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
+	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
+	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
 	I(ImplicitOps | VendorSpecific, em_sysenter),
 	I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
 	N, N,
@@ -3275,26 +3447,28 @@ static struct opcode twobyte_table[256] = {
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
 	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
-	DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
+	DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
 	/* 0xA8 - 0xAF */
 	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
-	DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
+	DI(ImplicitOps, rsm),
+	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM),
 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
 	/* 0xB0 - 0xB7 */
-	D2bv(DstMem | SrcReg | ModRM | Lock),
+	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
 	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
-	D(DstMem | SrcReg | ModRM | BitOp | Lock),
+	I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
 	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
 	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xB8 - 0xBF */
 	N, N,
-	G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
-	D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
+	G(BitOp, group8),
+	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
+	I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xCF */
 	D2bv(DstMem | SrcReg | ModRM | Lock),
@@ -3320,6 +3494,7 @@ static struct opcode twobyte_table[256] = {
 #undef D2bv
 #undef D2bvIP
 #undef I2bv
+#undef I2bvIP
 #undef I6ALU
 
 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
@@ -3697,6 +3872,11 @@ done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
+bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
+{
+	return ctxt->d & PageTable;
+}
+
 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
 {
 	/* The second termination condition only applies for REPE
@@ -3720,7 +3900,6 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
-	u64 msr_data;
 	int rc = X86EMUL_CONTINUE;
 	int saved_dst_type = ctxt->dst.type;
 
@@ -3854,15 +4033,6 @@ special_insn:
 			goto cannot_emulate;
 		ctxt->dst.val = (s32) ctxt->src.val;
 		break;
-	case 0x6c:		/* insb */
-	case 0x6d:		/* insw/insd */
-		ctxt->src.val = ctxt->regs[VCPU_REGS_RDX];
-		goto do_io_in;
-	case 0x6e:		/* outsb */
-	case 0x6f:		/* outsw/outsd */
-		ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX];
-		goto do_io_out;
-		break;
 	case 0x70 ... 0x7f: /* jcc (short) */
 		if (test_cc(ctxt->b, ctxt->eflags))
 			jmp_rel(ctxt, ctxt->src.val);
@@ -3870,9 +4040,6 @@ special_insn:
 	case 0x8d: /* lea r16/r32, m */
 		ctxt->dst.val = ctxt->src.addr.mem.ea;
 		break;
-	case 0x8f:		/* pop (sole member of Grp1a) */
-		rc = em_grp1a(ctxt);
-		break;
 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
 		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
 			break;
@@ -3905,38 +4072,11 @@ special_insn:
 		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
 		rc = em_grp2(ctxt);
 		break;
-	case 0xe4: 	/* inb */
-	case 0xe5: 	/* in */
-		goto do_io_in;
-	case 0xe6: /* outb */
-	case 0xe7: /* out */
-		goto do_io_out;
-	case 0xe8: /* call (near) */ {
-		long int rel = ctxt->src.val;
-		ctxt->src.val = (unsigned long) ctxt->_eip;
-		jmp_rel(ctxt, rel);
-		rc = em_push(ctxt);
-		break;
-	}
 	case 0xe9: /* jmp rel */
 	case 0xeb: /* jmp rel short */
 		jmp_rel(ctxt, ctxt->src.val);
 		ctxt->dst.type = OP_NONE; /* Disable writeback. */
 		break;
-	case 0xec: /* in al,dx */
-	case 0xed: /* in (e/r)ax,dx */
-	do_io_in:
-		if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
-				     &ctxt->dst.val))
-			goto done; /* IO is needed */
-		break;
-	case 0xee: /* out dx,al */
-	case 0xef: /* out dx,(e/r)ax */
-	do_io_out:
-		ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
-				      &ctxt->src.val, 1);
-		ctxt->dst.type = OP_NONE;	/* Disable writeback. */
-		break;
 	case 0xf4:              /* hlt */
 		ctxt->ops->halt(ctxt);
 		break;
@@ -3956,12 +4096,6 @@ special_insn:
 	case 0xfd: /* std */
 		ctxt->eflags |= EFLG_DF;
 		break;
-	case 0xfe: /* Grp4 */
-		rc = em_grp45(ctxt);
-		break;
-	case 0xff: /* Grp5 */
-		rc = em_grp45(ctxt);
-		break;
 	default:
 		goto cannot_emulate;
 	}
@@ -4036,49 +4170,6 @@ twobyte_insn:
 	case 0x21: /* mov from dr to reg */
 		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
 		break;
-	case 0x22: /* mov reg, cr */
-		if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) {
-			emulate_gp(ctxt, 0);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-		ctxt->dst.type = OP_NONE;
-		break;
-	case 0x23: /* mov from reg to dr */
-		if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val &
-				((ctxt->mode == X86EMUL_MODE_PROT64) ?
-				 ~0ULL : ~0U)) < 0) {
-			/* #UD condition is already handled by the code above */
-			emulate_gp(ctxt, 0);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-
-		ctxt->dst.type = OP_NONE;	/* no writeback */
-		break;
-	case 0x30:
-		/* wrmsr */
-		msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
-			| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
-		if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) {
-			emulate_gp(ctxt, 0);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-		rc = X86EMUL_CONTINUE;
-		break;
-	case 0x32:
-		/* rdmsr */
-		if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) {
-			emulate_gp(ctxt, 0);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		} else {
-			ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
-			ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
-		}
-		rc = X86EMUL_CONTINUE;
-		break;
 	case 0x40 ... 0x4f:	/* cmov */
 		ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val;
 		if (!test_cc(ctxt->b, ctxt->eflags))
@@ -4091,93 +4182,21 @@ twobyte_insn:
 	case 0x90 ... 0x9f:     /* setcc r/m8 */
 		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
 		break;
-	case 0xa3:
-	      bt:		/* bt */
-		ctxt->dst.type = OP_NONE;
-		/* only subword offset */
-		ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
-		emulate_2op_SrcV_nobyte(ctxt, "bt");
-		break;
 	case 0xa4: /* shld imm8, r, r/m */
 	case 0xa5: /* shld cl, r, r/m */
 		emulate_2op_cl(ctxt, "shld");
 		break;
-	case 0xab:
-	      bts:		/* bts */
-		emulate_2op_SrcV_nobyte(ctxt, "bts");
-		break;
 	case 0xac: /* shrd imm8, r, r/m */
 	case 0xad: /* shrd cl, r, r/m */
 		emulate_2op_cl(ctxt, "shrd");
 		break;
 	case 0xae:              /* clflush */
 		break;
-	case 0xb0 ... 0xb1:	/* cmpxchg */
-		/*
-		 * Save real source value, then compare EAX against
-		 * destination.
-		 */
-		ctxt->src.orig_val = ctxt->src.val;
-		ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
-		emulate_2op_SrcV(ctxt, "cmp");
-		if (ctxt->eflags & EFLG_ZF) {
-			/* Success: write back to memory. */
-			ctxt->dst.val = ctxt->src.orig_val;
-		} else {
-			/* Failure: write the value we saw to EAX. */
-			ctxt->dst.type = OP_REG;
-			ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
-		}
-		break;
-	case 0xb3:
-	      btr:		/* btr */
-		emulate_2op_SrcV_nobyte(ctxt, "btr");
-		break;
 	case 0xb6 ... 0xb7:	/* movzx */
 		ctxt->dst.bytes = ctxt->op_bytes;
 		ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val
 						       : (u16) ctxt->src.val;
 		break;
-	case 0xba:		/* Grp8 */
-		switch (ctxt->modrm_reg & 3) {
-		case 0:
-			goto bt;
-		case 1:
-			goto bts;
-		case 2:
-			goto btr;
-		case 3:
-			goto btc;
-		}
-		break;
-	case 0xbb:
-	      btc:		/* btc */
-		emulate_2op_SrcV_nobyte(ctxt, "btc");
-		break;
-	case 0xbc: {		/* bsf */
-		u8 zf;
-		__asm__ ("bsf %2, %0; setz %1"
-			 : "=r"(ctxt->dst.val), "=q"(zf)
-			 : "r"(ctxt->src.val));
-		ctxt->eflags &= ~X86_EFLAGS_ZF;
-		if (zf) {
-			ctxt->eflags |= X86_EFLAGS_ZF;
-			ctxt->dst.type = OP_NONE;	/* Disable writeback. */
-		}
-		break;
-	}
-	case 0xbd: {		/* bsr */
-		u8 zf;
-		__asm__ ("bsr %2, %0; setz %1"
-			 : "=r"(ctxt->dst.val), "=q"(zf)
-			 : "r"(ctxt->src.val));
-		ctxt->eflags &= ~X86_EFLAGS_ZF;
-		if (zf) {
-			ctxt->eflags |= X86_EFLAGS_ZF;
-			ctxt->dst.type = OP_NONE;	/* Disable writeback. */
-		}
-		break;
-	}
 	case 0xbe ... 0xbf:	/* movsx */
 		ctxt->dst.bytes = ctxt->op_bytes;
 		ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val :
@@ -4194,9 +4213,6 @@ twobyte_insn:
 		ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
 							(u64) ctxt->src.val;
 		break;
-	case 0xc7:		/* Grp9 (cmpxchg8b) */
-		rc = em_grp9(ctxt);
-		break;
 	default:
 		goto cannot_emulate;
 	}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 405f262..d68f99d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -344,7 +344,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	struct kvm_timer *pt = &ps->pit_timer;
 	s64 interval;
 
-	if (!irqchip_in_kernel(kvm))
+	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
 		return;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -397,15 +397,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
-		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
-			create_pit_timer(kvm, val, 0);
-		}
+		create_pit_timer(kvm, val, 0);
 		break;
 	case 2:
 	case 3:
-		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
-			create_pit_timer(kvm, val, 1);
-		}
+		create_pit_timer(kvm, val, 1);
 		break;
 	default:
 		destroy_pit_timer(kvm->arch.vpit);
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cac4746..b6a7353 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -262,9 +262,10 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq;
-	struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu;
+	int irq, i;
+	struct kvm_vcpu *vcpu;
 	u8 irr = s->irr, isr = s->imr;
+	bool found = false;
 
 	s->last_irr = 0;
 	s->irr = 0;
@@ -281,12 +282,19 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 	s->special_fully_nested_mode = 0;
 	s->init4 = 0;
 
-	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
-		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (irr & (1 << irq) || isr & (1 << irq)) {
-				pic_clear_isr(s, irq);
-			}
-	}
+	kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
+		if (kvm_apic_accept_pic_intr(vcpu)) {
+			found = true;
+			break;
+		}
+
+
+	if (!found)
+		return;
+
+	for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
+		if (irr & (1 << irq) || isr & (1 << irq))
+			pic_clear_isr(s, irq);
 }
 
 static void pic_ioport_write(void *opaque, u32 addr, u32 val)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 54abb40..cfdc6e0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -38,6 +38,7 @@
 #include "irq.h"
 #include "trace.h"
 #include "x86.h"
+#include "cpuid.h"
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -1120,7 +1121,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
+int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
 	u32 reg = apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 138e8cc..6f4ce25 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -34,6 +34,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f1b36cf..2a2a9b4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -59,15 +59,6 @@ enum {
 	AUDIT_POST_SYNC
 };
 
-char *audit_point_name[] = {
-	"pre page fault",
-	"post page fault",
-	"pre pte write",
-	"post pte write",
-	"pre sync",
-	"post sync"
-};
-
 #undef MMU_DEBUG
 
 #ifdef MMU_DEBUG
@@ -87,9 +78,6 @@ static int dbg = 0;
 module_param(dbg, bool, 0644);
 #endif
 
-static int oos_shadow = 1;
-module_param(oos_shadow, bool, 0644);
-
 #ifndef MMU_DEBUG
 #define ASSERT(x) do { } while (0)
 #else
@@ -593,6 +581,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 	return 0;
 }
 
+static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
+{
+	return cache->nobjs;
+}
+
 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
 				  struct kmem_cache *cache)
 {
@@ -953,21 +946,35 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
 	}
 }
 
+static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level,
+				    struct kvm_memory_slot *slot)
+{
+	struct kvm_lpage_info *linfo;
+
+	if (likely(level == PT_PAGE_TABLE_LEVEL))
+		return &slot->rmap[gfn - slot->base_gfn];
+
+	linfo = lpage_info_slot(gfn, slot, level);
+	return &linfo->rmap_pde;
+}
+
 /*
  * Take gfn and return the reverse mapping to it.
  */
 static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
 {
 	struct kvm_memory_slot *slot;
-	struct kvm_lpage_info *linfo;
 
 	slot = gfn_to_memslot(kvm, gfn);
-	if (likely(level == PT_PAGE_TABLE_LEVEL))
-		return &slot->rmap[gfn - slot->base_gfn];
+	return __gfn_to_rmap(kvm, gfn, level, slot);
+}
 
-	linfo = lpage_info_slot(gfn, slot, level);
+static bool rmap_can_add(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_memory_cache *cache;
 
-	return &linfo->rmap_pde;
+	cache = &vcpu->arch.mmu_pte_list_desc_cache;
+	return mmu_memory_cache_free_objects(cache);
 }
 
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
@@ -1004,17 +1011,16 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
 		rmap_remove(kvm, sptep);
 }
 
-static int rmap_write_protect(struct kvm *kvm, u64 gfn)
+int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
+			       struct kvm_memory_slot *slot)
 {
 	unsigned long *rmapp;
 	u64 *spte;
 	int i, write_protected = 0;
 
-	rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);
-
+	rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot);
 	spte = rmap_next(kvm, rmapp, NULL);
 	while (spte) {
-		BUG_ON(!spte);
 		BUG_ON(!(*spte & PT_PRESENT_MASK));
 		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
 		if (is_writable_pte(*spte)) {
@@ -1027,12 +1033,11 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	/* check for huge page mappings */
 	for (i = PT_DIRECTORY_LEVEL;
 	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
-		rmapp = gfn_to_rmap(kvm, gfn, i);
+		rmapp = __gfn_to_rmap(kvm, gfn, i, slot);
 		spte = rmap_next(kvm, rmapp, NULL);
 		while (spte) {
-			BUG_ON(!spte);
 			BUG_ON(!(*spte & PT_PRESENT_MASK));
-			BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
+			BUG_ON(!is_large_pte(*spte));
 			pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
 			if (is_writable_pte(*spte)) {
 				drop_spte(kvm, spte);
@@ -1047,6 +1052,14 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	return write_protected;
 }
 
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = gfn_to_memslot(kvm, gfn);
+	return kvm_mmu_rmap_write_protect(kvm, gfn, slot);
+}
+
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			   unsigned long data)
 {
@@ -1103,15 +1116,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
 					 unsigned long data))
 {
-	int i, j;
+	int j;
 	int ret;
 	int retval = 0;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &slots->memslots[i];
+	kvm_for_each_memslot(memslot, slots) {
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
@@ -1324,7 +1337,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 						  PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
 	sp->parent_ptes = 0;
 	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
@@ -1511,6 +1524,13 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
+#ifdef CONFIG_KVM_MMU_AUDIT
+#include "mmu_audit.c"
+#else
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
+static void mmu_audit_disable(void) { }
+#endif
+
 static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			 struct list_head *invalid_list)
 {
@@ -1640,6 +1660,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
 		sp->spt[i] = 0ull;
 }
 
+static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
+{
+	sp->write_flooding_count = 0;
+}
+
+static void clear_sp_write_flooding_count(u64 *spte)
+{
+	struct kvm_mmu_page *sp =  page_header(__pa(spte));
+
+	__clear_sp_write_flooding_count(sp);
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -1683,6 +1715,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		} else if (sp->unsync)
 			kvm_mmu_mark_parents_unsync(sp);
 
+		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
 		return sp;
 	}
@@ -1796,7 +1829,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	}
 }
 
-static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
+static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 			     u64 *spte)
 {
 	u64 pte;
@@ -1804,17 +1837,21 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 
 	pte = *spte;
 	if (is_shadow_present_pte(pte)) {
-		if (is_last_spte(pte, sp->role.level))
+		if (is_last_spte(pte, sp->role.level)) {
 			drop_spte(kvm, spte);
-		else {
+			if (is_large_pte(pte))
+				--kvm->stat.lpages;
+		} else {
 			child = page_header(pte & PT64_BASE_ADDR_MASK);
 			drop_parent_pte(child, spte);
 		}
-	} else if (is_mmio_spte(pte))
+		return true;
+	}
+
+	if (is_mmio_spte(pte))
 		mmu_spte_clear_no_track(spte);
 
-	if (is_large_pte(pte))
-		--kvm->stat.lpages;
+	return false;
 }
 
 static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1831,15 +1868,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
 	mmu_page_remove_parent_pte(sp, parent_pte);
 }
 
-static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
-{
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		vcpu->arch.last_pte_updated = NULL;
-}
-
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *parent_pte;
@@ -1899,7 +1927,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	}
 
 	sp->role.invalid = 1;
-	kvm_mmu_reset_last_pte_updated(kvm);
 	return ret;
 }
 
@@ -1985,7 +2012,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
 	kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
 }
 
-static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
 	struct hlist_node *node;
@@ -1994,7 +2021,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 
 	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
 	r = 0;
-
+	spin_lock(&kvm->mmu_lock);
 	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
 		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 			 sp->role.word);
@@ -2002,22 +2029,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
 	}
 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
-	return r;
-}
-
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
-{
-	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
-	LIST_HEAD(invalid_list);
+	spin_unlock(&kvm->mmu_lock);
 
-	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
-		pgprintk("%s: zap %llx %x\n",
-			 __func__, gfn, sp->role.word);
-		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-	}
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	return r;
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 {
@@ -2169,8 +2185,6 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 			return 1;
 
 		if (!need_unsync && !s->unsync) {
-			if (!oos_shadow)
-				return 1;
 			need_unsync = true;
 		}
 	}
@@ -2191,11 +2205,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
 		return 0;
 
-	/*
-	 * We don't set the accessed bit, since we sometimes want to see
-	 * whether the guest actually used the pte (in order to detect
-	 * demand paging).
-	 */
 	spte = PT_PRESENT_MASK;
 	if (!speculative)
 		spte |= shadow_accessed_mask;
@@ -2346,10 +2355,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		}
 	}
 	kvm_release_pfn_clean(pfn);
-	if (speculative) {
-		vcpu->arch.last_pte_updated = sptep;
-		vcpu->arch.last_pte_gfn = gfn;
-	}
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -2840,12 +2845,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu_clear_mmio_info(vcpu, ~0ul);
-	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 		sp = page_header(root);
 		mmu_sync_children(vcpu, sp);
-		trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+		kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
 		return;
 	}
 	for (i = 0; i < 4; ++i) {
@@ -2857,7 +2862,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 			mmu_sync_children(vcpu, sp);
 		}
 	}
-	trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+	kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
 }
 
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -3510,28 +3515,119 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
 		kvm_mmu_flush_tlb(vcpu);
 }
 
-static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
+static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+				    const u8 *new, int *bytes)
 {
-	u64 *spte = vcpu->arch.last_pte_updated;
+	u64 gentry;
+	int r;
+
+	/*
+	 * Assume that the pte write on a page table of the same type
+	 * as the current vcpu paging mode since we update the sptes only
+	 * when they have the same mode.
+	 */
+	if (is_pae(vcpu) && *bytes == 4) {
+		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+		*gpa &= ~(gpa_t)7;
+		*bytes = 8;
+		r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
+		if (r)
+			gentry = 0;
+		new = (const u8 *)&gentry;
+	}
 
-	return !!(spte && (*spte & shadow_accessed_mask));
+	switch (*bytes) {
+	case 4:
+		gentry = *(const u32 *)new;
+		break;
+	case 8:
+		gentry = *(const u64 *)new;
+		break;
+	default:
+		gentry = 0;
+		break;
+	}
+
+	return gentry;
 }
 
-static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+/*
+ * If we're seeing too many writes to a page, it may no longer be a page table,
+ * or we may be forking, in which case it is better to unmap the page.
+ */
+static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
 {
-	u64 *spte = vcpu->arch.last_pte_updated;
+	/*
+	 * Skip write-flooding detected for the sp whose level is 1, because
+	 * it can become unsync, then the guest page is not write-protected.
+	 */
+	if (sp->role.level == 1)
+		return false;
 
-	if (spte
-	    && vcpu->arch.last_pte_gfn == gfn
-	    && shadow_accessed_mask
-	    && !(*spte & shadow_accessed_mask)
-	    && is_shadow_present_pte(*spte))
-		set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+	return ++sp->write_flooding_count >= 3;
+}
+
+/*
+ * Misaligned accesses are too much trouble to fix up; also, they usually
+ * indicate a page is not used as a page table.
+ */
+static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
+				    int bytes)
+{
+	unsigned offset, pte_size, misaligned;
+
+	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
+		 gpa, bytes, sp->role.word);
+
+	offset = offset_in_page(gpa);
+	pte_size = sp->role.cr4_pae ? 8 : 4;
+
+	/*
+	 * Sometimes, the OS only writes the last one bytes to update status
+	 * bits, for example, in linux, andb instruction is used in clear_bit().
+	 */
+	if (!(offset & (pte_size - 1)) && bytes == 1)
+		return false;
+
+	misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
+	misaligned |= bytes < 4;
+
+	return misaligned;
+}
+
+static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
+{
+	unsigned page_offset, quadrant;
+	u64 *spte;
+	int level;
+
+	page_offset = offset_in_page(gpa);
+	level = sp->role.level;
+	*nspte = 1;
+	if (!sp->role.cr4_pae) {
+		page_offset <<= 1;	/* 32->64 */
+		/*
+		 * A 32-bit pde maps 4MB while the shadow pdes map
+		 * only 2MB.  So we need to double the offset again
+		 * and zap two pdes instead of one.
+		 */
+		if (level == PT32_ROOT_LEVEL) {
+			page_offset &= ~7; /* kill rounding error */
+			page_offset <<= 1;
+			*nspte = 2;
+		}
+		quadrant = page_offset >> PAGE_SHIFT;
+		page_offset &= ~PAGE_MASK;
+		if (quadrant != sp->role.quadrant)
+			return NULL;
+	}
+
+	spte = &sp->spt[page_offset / sizeof(*spte)];
+	return spte;
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes,
-		       bool guest_initiated)
+		       const u8 *new, int bytes)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	union kvm_mmu_page_role mask = { .word = 0 };
@@ -3539,8 +3635,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	struct hlist_node *node;
 	LIST_HEAD(invalid_list);
 	u64 entry, gentry, *spte;
-	unsigned pte_size, page_offset, misaligned, quadrant, offset;
-	int level, npte, invlpg_counter, r, flooded = 0;
+	int npte;
 	bool remote_flush, local_flush, zap_page;
 
 	/*
@@ -3551,112 +3646,45 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		return;
 
 	zap_page = remote_flush = local_flush = false;
-	offset = offset_in_page(gpa);
 
 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 
-	invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
+	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
 
 	/*
-	 * Assume that the pte write on a page table of the same type
-	 * as the current vcpu paging mode since we update the sptes only
-	 * when they have the same mode.
+	 * No need to care whether allocation memory is successful
+	 * or not since pte prefetch is skiped if it does not have
+	 * enough objects in the cache.
 	 */
-	if ((is_pae(vcpu) && bytes == 4) || !new) {
-		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
-		if (is_pae(vcpu)) {
-			gpa &= ~(gpa_t)7;
-			bytes = 8;
-		}
-		r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
-		if (r)
-			gentry = 0;
-		new = (const u8 *)&gentry;
-	}
-
-	switch (bytes) {
-	case 4:
-		gentry = *(const u32 *)new;
-		break;
-	case 8:
-		gentry = *(const u64 *)new;
-		break;
-	default:
-		gentry = 0;
-		break;
-	}
+	mmu_topup_memory_caches(vcpu);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
-		gentry = 0;
-	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
-	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
-	if (guest_initiated) {
-		kvm_mmu_access_page(vcpu, gfn);
-		if (gfn == vcpu->arch.last_pt_write_gfn
-		    && !last_updated_pte_accessed(vcpu)) {
-			++vcpu->arch.last_pt_write_count;
-			if (vcpu->arch.last_pt_write_count >= 3)
-				flooded = 1;
-		} else {
-			vcpu->arch.last_pt_write_gfn = gfn;
-			vcpu->arch.last_pt_write_count = 1;
-			vcpu->arch.last_pte_updated = NULL;
-		}
-	}
+	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
 	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
 	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
-		pte_size = sp->role.cr4_pae ? 8 : 4;
-		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
-		misaligned |= bytes < 4;
-		if (misaligned || flooded) {
-			/*
-			 * Misaligned accesses are too much trouble to fix
-			 * up; also, they usually indicate a page is not used
-			 * as a page table.
-			 *
-			 * If we're seeing too many writes to a page,
-			 * it may no longer be a page table, or we may be
-			 * forking, in which case it is better to unmap the
-			 * page.
-			 */
-			pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-				 gpa, bytes, sp->role.word);
+		spte = get_written_sptes(sp, gpa, &npte);
+
+		if (detect_write_misaligned(sp, gpa, bytes) ||
+		      detect_write_flooding(sp, spte)) {
 			zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
 						     &invalid_list);
 			++vcpu->kvm->stat.mmu_flooded;
 			continue;
 		}
-		page_offset = offset;
-		level = sp->role.level;
-		npte = 1;
-		if (!sp->role.cr4_pae) {
-			page_offset <<= 1;	/* 32->64 */
-			/*
-			 * A 32-bit pde maps 4MB while the shadow pdes map
-			 * only 2MB.  So we need to double the offset again
-			 * and zap two pdes instead of one.
-			 */
-			if (level == PT32_ROOT_LEVEL) {
-				page_offset &= ~7; /* kill rounding error */
-				page_offset <<= 1;
-				npte = 2;
-			}
-			quadrant = page_offset >> PAGE_SHIFT;
-			page_offset &= ~PAGE_MASK;
-			if (quadrant != sp->role.quadrant)
-				continue;
-		}
+
+		spte = get_written_sptes(sp, gpa, &npte);
+		if (!spte)
+			continue;
+
 		local_flush = true;
-		spte = &sp->spt[page_offset / sizeof(*spte)];
 		while (npte--) {
 			entry = *spte;
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
 			if (gentry &&
 			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
-			      & mask.word))
+			      & mask.word) && rmap_can_add(vcpu))
 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (!remote_flush && need_remote_flush(entry, *spte))
 				remote_flush = true;
@@ -3665,7 +3693,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	}
 	mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-	trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
+	kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
@@ -3679,9 +3707,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 
 	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
-	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	spin_unlock(&vcpu->kvm->mmu_lock);
+
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3702,10 +3729,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 }
 
+static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
+{
+	if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 		       void *insn, int insn_len)
 {
-	int r;
+	int r, emulation_type = EMULTYPE_RETRY;
 	enum emulation_result er;
 
 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
@@ -3717,11 +3752,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 		goto out;
 	}
 
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		goto out;
+	if (is_mmio_page_fault(vcpu, cr2))
+		emulation_type = 0;
 
-	er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
+	er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
 
 	switch (er) {
 	case EMULATE_DONE:
@@ -3792,7 +3826,11 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
 	ASSERT(vcpu);
-	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.mmu.translate_gpa = translate_gpa;
+	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
 	return alloc_mmu_pages(vcpu);
 }
@@ -3852,14 +3890,14 @@ restart:
 	spin_unlock(&kvm->mmu_lock);
 }
 
-static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
-					       struct list_head *invalid_list)
+static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
+						struct list_head *invalid_list)
 {
 	struct kvm_mmu_page *page;
 
 	page = container_of(kvm->arch.active_mmu_pages.prev,
 			    struct kvm_mmu_page, link);
-	return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
+	kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
 }
 
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
@@ -3874,15 +3912,15 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	raw_spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int idx, freed_pages;
+		int idx;
 		LIST_HEAD(invalid_list);
 
 		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 		if (!kvm_freed && nr_to_scan > 0 &&
 		    kvm->arch.n_used_mmu_pages > 0) {
-			freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm,
-							  &invalid_list);
+			kvm_mmu_remove_some_alloc_mmu_pages(kvm,
+							    &invalid_list);
 			kvm_freed = kvm;
 		}
 		nr_to_scan--;
@@ -3944,15 +3982,15 @@ nomem:
  */
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 {
-	int i;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++)
-		nr_pages += slots->memslots[i].npages;
+	kvm_for_each_memslot(memslot, slots)
+		nr_pages += memslot->npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3961,127 +3999,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	return nr_mmu_pages;
 }
 
-static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
-				unsigned len)
-{
-	if (len > buffer->len)
-		return NULL;
-	return buffer->ptr;
-}
-
-static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
-				unsigned len)
-{
-	void *ret;
-
-	ret = pv_mmu_peek_buffer(buffer, len);
-	if (!ret)
-		return ret;
-	buffer->ptr += len;
-	buffer->len -= len;
-	buffer->processed += len;
-	return ret;
-}
-
-static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
-			     gpa_t addr, gpa_t value)
-{
-	int bytes = 8;
-	int r;
-
-	if (!is_long_mode(vcpu) && !is_pae(vcpu))
-		bytes = 4;
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		return r;
-
-	if (!emulator_write_phys(vcpu, addr, &value, bytes))
-		return -EFAULT;
-
-	return 1;
-}
-
-static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
-	(void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu));
-	return 1;
-}
-
-static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
-{
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	return 1;
-}
-
-static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
-			     struct kvm_pv_mmu_op_buffer *buffer)
-{
-	struct kvm_mmu_op_header *header;
-
-	header = pv_mmu_peek_buffer(buffer, sizeof *header);
-	if (!header)
-		return 0;
-	switch (header->op) {
-	case KVM_MMU_OP_WRITE_PTE: {
-		struct kvm_mmu_op_write_pte *wpte;
-
-		wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
-		if (!wpte)
-			return 0;
-		return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
-					wpte->pte_val);
-	}
-	case KVM_MMU_OP_FLUSH_TLB: {
-		struct kvm_mmu_op_flush_tlb *ftlb;
-
-		ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
-		if (!ftlb)
-			return 0;
-		return kvm_pv_mmu_flush_tlb(vcpu);
-	}
-	case KVM_MMU_OP_RELEASE_PT: {
-		struct kvm_mmu_op_release_pt *rpt;
-
-		rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
-		if (!rpt)
-			return 0;
-		return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
-	}
-	default: return 0;
-	}
-}
-
-int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
-		  gpa_t addr, unsigned long *ret)
-{
-	int r;
-	struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer;
-
-	buffer->ptr = buffer->buf;
-	buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf);
-	buffer->processed = 0;
-
-	r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len);
-	if (r)
-		goto out;
-
-	while (buffer->len) {
-		r = kvm_pv_mmu_op_one(vcpu, buffer);
-		if (r < 0)
-			goto out;
-		if (r == 0)
-			break;
-	}
-
-	r = 1;
-out:
-	*ret = buffer->processed;
-	return r;
-}
-
 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
 {
 	struct kvm_shadow_walk_iterator iterator;
@@ -4110,12 +4027,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 	mmu_free_memory_caches(vcpu);
 }
 
-#ifdef CONFIG_KVM_MMU_AUDIT
-#include "mmu_audit.c"
-#else
-static void mmu_audit_disable(void) { }
-#endif
-
 void kvm_mmu_module_exit(void)
 {
 	mmu_destroy_caches();
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 746ec25..fe15dcc 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -19,6 +19,15 @@
 
 #include <linux/ratelimit.h>
 
+char const *audit_point_name[] = {
+	"pre page fault",
+	"post page fault",
+	"pre pte write",
+	"post pte write",
+	"pre sync",
+	"post sync"
+};
+
 #define audit_printk(kvm, fmt, args...)		\
 	printk(KERN_ERR "audit: (%s) error: "	\
 		fmt, audit_point_name[kvm->arch.audit_point], ##args)
@@ -224,7 +233,10 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
 	mmu_spte_walk(vcpu, audit_spte);
 }
 
-static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point)
+static bool mmu_audit;
+static struct jump_label_key mmu_audit_key;
+
+static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
 {
 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
 
@@ -236,18 +248,18 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point)
 	audit_vcpu_spte(vcpu);
 }
 
-static bool mmu_audit;
+static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
+{
+	if (static_branch((&mmu_audit_key)))
+		__kvm_mmu_audit(vcpu, point);
+}
 
 static void mmu_audit_enable(void)
 {
-	int ret;
-
 	if (mmu_audit)
 		return;
 
-	ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
-	WARN_ON(ret);
-
+	jump_label_inc(&mmu_audit_key);
 	mmu_audit = true;
 }
 
@@ -256,8 +268,7 @@ static void mmu_audit_disable(void)
 	if (!mmu_audit)
 		return;
 
-	unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
-	tracepoint_synchronize_unregister();
+	jump_label_dec(&mmu_audit_key);
 	mmu_audit = false;
 }
 
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index eed67f3..89fb0e8 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -243,25 +243,6 @@ TRACE_EVENT(
 	TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn,
 		  __entry->access)
 );
-
-TRACE_EVENT(
-	kvm_mmu_audit,
-	TP_PROTO(struct kvm_vcpu *vcpu, int audit_point),
-	TP_ARGS(vcpu, audit_point),
-
-	TP_STRUCT__entry(
-		__field(struct kvm_vcpu *, vcpu)
-		__field(int, audit_point)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu = vcpu;
-		__entry->audit_point = audit_point;
-	),
-
-	TP_printk("vcpu:%d %s", __entry->vcpu->cpu,
-		  audit_point_name[__entry->audit_point])
-);
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9299410..1561028 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	     shadow_walk_next(&it)) {
 		gfn_t table_gfn;
 
+		clear_sp_write_flooding_count(it.sptep);
 		drop_large_spte(vcpu, it.sptep);
 
 		sp = NULL;
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	     shadow_walk_next(&it)) {
 		gfn_t direct_gfn;
 
+		clear_sp_write_flooding_count(it.sptep);
 		validate_direct_spte(vcpu, it.sptep, direct_access);
 
 		drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		link_shadow_page(it.sptep, sp);
 	}
 
+	clear_sp_write_flooding_count(it.sptep);
 	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
 		     user_fault, write_fault, emulate, it.level,
 		     gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	 */
 	if (!r) {
 		pgprintk("%s: guest page fault\n", __func__);
-		if (!prefault) {
+		if (!prefault)
 			inject_page_fault(vcpu, &walker.fault);
-			/* reset fork detector */
-			vcpu->arch.last_pt_write_count = 0;
-		}
+
 		return 0;
 	}
 
@@ -631,7 +632,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 
-	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
+	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
 	kvm_mmu_free_some_pages(vcpu);
 	if (!force_pt_level)
 		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
@@ -641,11 +642,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
 		 sptep, *sptep, emulate);
 
-	if (!emulate)
-		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-
 	++vcpu->stat.pf_fixed;
-	trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return emulate;
@@ -656,65 +654,66 @@ out_unlock:
 	return 0;
 }
 
+static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
+{
+	int offset = 0;
+
+	WARN_ON(sp->role.level != 1);
+
+	if (PTTYPE == 32)
+		offset = sp->role.quadrant << PT64_LEVEL_BITS;
+
+	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
+}
+
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
-	gpa_t pte_gpa = -1;
 	int level;
 	u64 *sptep;
-	int need_flush = 0;
 
 	vcpu_clear_mmio_info(vcpu, gva);
 
-	spin_lock(&vcpu->kvm->mmu_lock);
+	/*
+	 * No need to check return value here, rmap_can_add() can
+	 * help us to skip pte prefetch later.
+	 */
+	mmu_topup_memory_caches(vcpu);
 
+	spin_lock(&vcpu->kvm->mmu_lock);
 	for_each_shadow_entry(vcpu, gva, iterator) {
 		level = iterator.level;
 		sptep = iterator.sptep;
 
 		sp = page_header(__pa(sptep));
 		if (is_last_spte(*sptep, level)) {
-			int offset, shift;
+			pt_element_t gpte;
+			gpa_t pte_gpa;
 
 			if (!sp->unsync)
 				break;
 
-			shift = PAGE_SHIFT -
-				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
-			offset = sp->role.quadrant << shift;
-
-			pte_gpa = (sp->gfn << PAGE_SHIFT) + offset;
+			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
 			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 
-			if (is_shadow_present_pte(*sptep)) {
-				if (is_large_pte(*sptep))
-					--vcpu->kvm->stat.lpages;
-				drop_spte(vcpu->kvm, sptep);
-				need_flush = 1;
-			} else if (is_mmio_spte(*sptep))
-				mmu_spte_clear_no_track(sptep);
+			if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
+				kvm_flush_remote_tlbs(vcpu->kvm);
 
-			break;
+			if (!rmap_can_add(vcpu))
+				break;
+
+			if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
+						  sizeof(pt_element_t)))
+				break;
+
+			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
 		}
 
 		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
 			break;
 	}
-
-	if (need_flush)
-		kvm_flush_remote_tlbs(vcpu->kvm);
-
-	atomic_inc(&vcpu->kvm->arch.invlpg_counter);
-
 	spin_unlock(&vcpu->kvm->mmu_lock);
-
-	if (pte_gpa == -1)
-		return;
-
-	if (mmu_topup_memory_caches(vcpu))
-		return;
-	kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
@@ -769,19 +768,14 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
  */
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
-	int i, offset, nr_present;
+	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
 
-	offset = nr_present = 0;
-
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
 
-	if (PTTYPE == 32)
-		offset = sp->role.quadrant << PT64_LEVEL_BITS;
-
-	first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
+	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 		unsigned pte_access;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
new file mode 100644
index 0000000..7aad544
--- /dev/null
+++ b/arch/x86/kvm/pmu.c
@@ -0,0 +1,533 @@
+/*
+ * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@redhat.com>
+ *   Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
+
+static struct kvm_arch_event_perf_mapping {
+	u8 eventsel;
+	u8 unit_mask;
+	unsigned event_type;
+	bool inexact;
+} arch_events[] = {
+	/* Index must match CPUID 0x0A.EBX bit vector */
+	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+	[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
+	[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
+	[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
+	[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+	[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+};
+
+/* mapping between fixed pmc index and arch_events array */
+int fixed_pmc_events[] = {1, 0, 2};
+
+static bool pmc_is_gp(struct kvm_pmc *pmc)
+{
+	return pmc->type == KVM_PMC_GP;
+}
+
+static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	return pmu->counter_bitmask[pmc->type];
+}
+
+static inline bool pmc_enabled(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+}
+
+static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+					 u32 base)
+{
+	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+		return &pmu->gp_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
+{
+	int base = MSR_CORE_PERF_FIXED_CTR0;
+	if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
+		return &pmu->fixed_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
+{
+	return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx);
+}
+
+static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+	if (idx < X86_PMC_IDX_FIXED)
+		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
+	else
+		return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED);
+}
+
+void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.apic)
+		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
+}
+
+static void trigger_pmi(struct irq_work *irq_work)
+{
+	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
+			irq_work);
+	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
+			arch.pmu);
+
+	kvm_deliver_pmi(vcpu);
+}
+
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+}
+
+static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
+		kvm_perf_overflow(perf_event, data, regs);
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+		/*
+		 * Inject PMI. If vcpu was in a guest mode during NMI PMI
+		 * can be ejected on a guest mode re-entry. Otherwise we can't
+		 * be sure that vcpu wasn't executing hlt instruction at the
+		 * time of vmexit and is not going to re-enter guest mode until,
+		 * woken up. So we should wake it, but this is impossible from
+		 * NMI context. Do it from irq work instead.
+		 */
+		if (!kvm_is_in_guest())
+			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
+		else
+			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+	}
+}
+
+static u64 read_pmc(struct kvm_pmc *pmc)
+{
+	u64 counter, enabled, running;
+
+	counter = pmc->counter;
+
+	if (pmc->perf_event)
+		counter += perf_event_read_value(pmc->perf_event,
+						 &enabled, &running);
+
+	/* FIXME: Scaling needed? */
+
+	return counter & pmc_bitmask(pmc);
+}
+
+static void stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = read_pmc(pmc);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
+		unsigned config, bool exclude_user, bool exclude_kernel,
+		bool intr)
+{
+	struct perf_event *event;
+	struct perf_event_attr attr = {
+		.type = type,
+		.size = sizeof(attr),
+		.pinned = true,
+		.exclude_idle = true,
+		.exclude_host = 1,
+		.exclude_user = exclude_user,
+		.exclude_kernel = exclude_kernel,
+		.config = config,
+	};
+
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
+	event = perf_event_create_kernel_counter(&attr, -1, current,
+						 intr ? kvm_perf_overflow_intr :
+						 kvm_perf_overflow, pmc);
+	if (IS_ERR(event)) {
+		printk_once("kvm: pmu event creation failed %ld\n",
+				PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
+}
+
+static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
+		u8 unit_mask)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arch_events); i++)
+		if (arch_events[i].eventsel == event_select
+				&& arch_events[i].unit_mask == unit_mask
+				&& (pmu->available_event_types & (1 << i)))
+			break;
+
+	if (i == ARRAY_SIZE(arch_events))
+		return PERF_COUNT_HW_MAX;
+
+	return arch_events[i].event_type;
+}
+
+static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+{
+	unsigned config, type = PERF_TYPE_RAW;
+	u8 event_select, unit_mask;
+
+	pmc->eventsel = eventsel;
+
+	stop_counter(pmc);
+
+	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
+		return;
+
+	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+
+	if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE |
+				ARCH_PERFMON_EVENTSEL_INV |
+				ARCH_PERFMON_EVENTSEL_CMASK))) {
+		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
+				unit_mask);
+		if (config != PERF_COUNT_HW_MAX)
+			type = PERF_TYPE_HARDWARE;
+	}
+
+	if (type == PERF_TYPE_RAW)
+		config = eventsel & X86_RAW_EVENT_MASK;
+
+	reprogram_counter(pmc, type, config,
+			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			eventsel & ARCH_PERFMON_EVENTSEL_INT);
+}
+
+static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
+{
+	unsigned en = en_pmi & 0x3;
+	bool pmi = en_pmi & 0x8;
+
+	stop_counter(pmc);
+
+	if (!en || !pmc_enabled(pmc))
+		return;
+
+	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
+			arch_events[fixed_pmc_events[idx]].event_type,
+			!(en & 0x2), /* exclude user */
+			!(en & 0x1), /* exclude kernel */
+			pmi);
+}
+
+static inline u8 fixed_en_pmi(u64 ctrl, int idx)
+{
+	return (ctrl >> (idx * 4)) & 0xf;
+}
+
+static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+		u8 en_pmi = fixed_en_pmi(data, i);
+		struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i);
+
+		if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi)
+			continue;
+
+		reprogram_fixed_counter(pmc, en_pmi, i);
+	}
+
+	pmu->fixed_ctr_ctrl = data;
+}
+
+static void reprogram_idx(struct kvm_pmu *pmu, int idx)
+{
+	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
+
+	if (!pmc)
+		return;
+
+	if (pmc_is_gp(pmc))
+		reprogram_gp_counter(pmc, pmc->eventsel);
+	else {
+		int fidx = idx - X86_PMC_IDX_FIXED;
+		reprogram_fixed_counter(pmc,
+				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
+	}
+}
+
+static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+{
+	int bit;
+	u64 diff = pmu->global_ctrl ^ data;
+
+	pmu->global_ctrl = data;
+
+	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+		reprogram_idx(pmu, bit);
+}
+
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int ret;
+
+	switch (msr) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		ret = pmu->version > 1;
+		break;
+	default:
+		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
+			|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
+			|| get_fixed_pmc(pmu, msr);
+		break;
+	}
+	return ret;
+}
+
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		*data = pmu->fixed_ctr_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		*data = pmu->global_status;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		*data = pmu->global_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		*data = pmu->global_ovf_ctrl;
+		return 0;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			*data = read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			*data = pmc->eventsel;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		if (pmu->fixed_ctr_ctrl == data)
+			return 0;
+		if (!(data & 0xfffffffffffff444)) {
+			reprogram_fixed_counters(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		break; /* RO MSR */
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (pmu->global_ctrl == data)
+			return 0;
+		if (!(data & pmu->global_ctrl_mask)) {
+			global_ctrl_changed(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+			pmu->global_status &= ~data;
+			pmu->global_ovf_ctrl = data;
+			return 0;
+		}
+		break;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			data = (s64)(s32)data;
+			pmc->counter += data - read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			if (data == pmc->eventsel)
+				return 0;
+			if (!(data & 0xffffffff00200000ull)) {
+				reprogram_gp_counter(pmc, data);
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	bool fast_mode = pmc & (1u << 31);
+	bool fixed = pmc & (1u << 30);
+	struct kvm_pmc *counters;
+	u64 ctr;
+
+	pmc &= (3u << 30) - 1;
+	if (!fixed && pmc >= pmu->nr_arch_gp_counters)
+		return 1;
+	if (fixed && pmc >= pmu->nr_arch_fixed_counters)
+		return 1;
+	counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+	ctr = read_pmc(&counters[pmc]);
+	if (fast_mode)
+		ctr = (u32)ctr;
+	*data = ctr;
+
+	return 0;
+}
+
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_cpuid_entry2 *entry;
+	unsigned bitmap_len;
+
+	pmu->nr_arch_gp_counters = 0;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->counter_bitmask[KVM_PMC_GP] = 0;
+	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+	pmu->version = 0;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+	if (!entry)
+		return;
+
+	pmu->version = entry->eax & 0xff;
+	if (!pmu->version)
+		return;
+
+	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
+			X86_PMC_MAX_GENERIC);
+	pmu->counter_bitmask[KVM_PMC_GP] =
+		((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
+	bitmap_len = (entry->eax >> 24) & 0xff;
+	pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+
+	if (pmu->version == 1) {
+		pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1;
+		return;
+	}
+
+	pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+			X86_PMC_MAX_FIXED);
+	pmu->counter_bitmask[KVM_PMC_FIXED] =
+		((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+	pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
+			| (((1ull << pmu->nr_arch_fixed_counters) - 1)
+				<< X86_PMC_IDX_FIXED));
+}
+
+void kvm_pmu_init(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	memset(pmu, 0, sizeof(*pmu));
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		pmu->gp_counters[i].type = KVM_PMC_GP;
+		pmu->gp_counters[i].vcpu = vcpu;
+		pmu->gp_counters[i].idx = i;
+	}
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++) {
+		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
+		pmu->fixed_counters[i].vcpu = vcpu;
+		pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED;
+	}
+	init_irq_work(&pmu->irq_work, trigger_pmi);
+	kvm_pmu_cpuid_update(vcpu);
+}
+
+void kvm_pmu_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int i;
+
+	irq_work_sync(&pmu->irq_work);
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+		stop_counter(pmc);
+		pmc->counter = pmc->eventsel = 0;
+	}
+
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++)
+		stop_counter(&pmu->fixed_counters[i]);
+
+	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
+		pmu->global_ovf_ctrl = 0;
+}
+
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_pmu_reset(vcpu);
+}
+
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	u64 bitmask;
+	int bit;
+
+	bitmask = pmu->reprogram_pmi;
+
+	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
+
+		if (unlikely(!pmc || !pmc->perf_event)) {
+			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			continue;
+		}
+
+		reprogram_idx(pmu, bit);
+	}
+}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e32243e..5fa553b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1014,6 +1014,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_NMI);
 	set_intercept(svm, INTERCEPT_SMI);
 	set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+	set_intercept(svm, INTERCEPT_RDPMC);
 	set_intercept(svm, INTERCEPT_CPUID);
 	set_intercept(svm, INTERCEPT_INVD);
 	set_intercept(svm, INTERCEPT_HLT);
@@ -2770,6 +2771,19 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+static int rdpmc_interception(struct vcpu_svm *svm)
+{
+	int err;
+
+	if (!static_cpu_has(X86_FEATURE_NRIPS))
+		return emulate_on_interception(svm);
+
+	err = kvm_rdpmc(&svm->vcpu);
+	kvm_complete_insn_gp(&svm->vcpu, err);
+
+	return 1;
+}
+
 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
 {
 	unsigned long cr0 = svm->vcpu.arch.cr0;
@@ -3190,6 +3204,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_SMI]				= nop_on_interception,
 	[SVM_EXIT_INIT]				= nop_on_interception,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
+	[SVM_EXIT_RDPMC]			= rdpmc_interception,
 	[SVM_EXIT_CPUID]			= cpuid_interception,
 	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index ae432ea..6b85cc6 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -18,9 +18,10 @@
 #include <linux/atomic.h>
 #include "kvm_timer.h"
 
-static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
+enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
 {
-	int restart_timer = 0;
+	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+	struct kvm_vcpu *vcpu = ktimer->vcpu;
 	wait_queue_head_t *q = &vcpu->wq;
 
 	/*
@@ -40,26 +41,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
 
 	if (ktimer->t_ops->is_periodic(ktimer)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		restart_timer = 1;
-	}
-
-	return restart_timer;
-}
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-	int restart_timer;
-	struct kvm_vcpu *vcpu;
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-
-	vcpu = ktimer->vcpu;
-	if (!vcpu)
-		return HRTIMER_NORESTART;
-
-	restart_timer = __kvm_timer_fn(vcpu, ktimer);
-	if (restart_timer)
 		return HRTIMER_RESTART;
-	else
+	} else
 		return HRTIMER_NORESTART;
 }
-
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 579a0b5..906a7e8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -18,6 +18,7 @@
 
 #include "irq.h"
 #include "mmu.h"
+#include "cpuid.h"
 
 #include <linux/kvm_host.h>
 #include <linux/module.h>
@@ -1747,7 +1748,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	int save_nmsrs, index;
 	unsigned long *msr_bitmap;
 
-	vmx_load_host_state(vmx);
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu)) {
@@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
+		CPU_BASED_RDPMC_EXITING |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
@@ -2142,12 +2143,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 			return 1;
 		/* Otherwise falls through */
 	default:
-		vmx_load_host_state(to_vmx(vcpu));
 		if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
 			return 0;
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
-			vmx_load_host_state(to_vmx(vcpu));
 			data = msr->data;
 			break;
 		}
@@ -2171,7 +2170,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 
 	switch (msr_index) {
 	case MSR_EFER:
-		vmx_load_host_state(vmx);
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
 		break;
 #ifdef CONFIG_X86_64
@@ -2220,7 +2218,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
-			vmx_load_host_state(vmx);
 			msr->data = data;
 			break;
 		}
@@ -2414,7 +2411,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_USE_TSC_OFFSETING |
 	      CPU_BASED_MWAIT_EXITING |
 	      CPU_BASED_MONITOR_EXITING |
-	      CPU_BASED_INVLPG_EXITING;
+	      CPU_BASED_INVLPG_EXITING |
+	      CPU_BASED_RDPMC_EXITING;
 
 	if (yield_on_hlt)
 		min |= CPU_BASED_HLT_EXITING;
@@ -2716,11 +2714,13 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
 		struct kvm_memslots *slots;
+		struct kvm_memory_slot *slot;
 		gfn_t base_gfn;
 
 		slots = kvm_memslots(kvm);
-		base_gfn = slots->memslots[0].base_gfn +
-				 kvm->memslots->memslots[0].npages - 3;
+		slot = id_to_memslot(slots, 0);
+		base_gfn = slot->base_gfn + slot->npages - 3;
+
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
-		/* We can get here when nested_run_pending caused
-		 * vmx_interrupt_allowed() to return false. In this case, do
-		 * nothing - the interrupt will be injected later.
+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+		/*
+		 * We get here if vmx_interrupt_allowed() said we can't
+		 * inject to L1 now because L2 must run. Ask L2 to exit
+		 * right after entry, so we can inject to L1 more promptly.
 		 */
+		kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
 		return;
+	}
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
@@ -4077,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
-		struct vmcs12 *vmcs12;
-		if (to_vmx(vcpu)->nested.nested_run_pending)
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+		if (to_vmx(vcpu)->nested.nested_run_pending ||
+		    (vmcs12->idt_vectoring_info_field &
+		     VECTORING_INFO_VALID_MASK))
 			return 0;
 		nested_vmx_vmexit(vcpu);
-		vmcs12 = get_vmcs12(vcpu);
 		vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
 		vmcs12->vm_exit_intr_info = 0;
 		/* fall through to normal code, but now in L1, not L2 */
@@ -4611,6 +4615,16 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_rdpmc(struct kvm_vcpu *vcpu)
+{
+	int err;
+
+	err = kvm_rdpmc(vcpu);
+	kvm_complete_insn_gp(vcpu, err);
+
+	return 1;
+}
+
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
 	skip_emulated_instruction(vcpu);
@@ -5561,6 +5575,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_HLT]                     = handle_halt,
 	[EXIT_REASON_INVD]		      = handle_invd,
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
+	[EXIT_REASON_RDPMC]                   = handle_rdpmc,
 	[EXIT_REASON_VMCALL]                  = handle_vmcall,
 	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
 	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c938da..1171def 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -26,6 +26,7 @@
 #include "tss.h"
 #include "kvm_cache_regs.h"
 #include "x86.h"
+#include "cpuid.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -82,8 +83,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
-static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				    struct kvm_cpuid_entry2 __user *entries);
 static void process_nmi(struct kvm_vcpu *vcpu);
 
 struct kvm_x86_ops *kvm_x86_ops;
@@ -574,54 +573,6 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
-}
-
-static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_SMEP));
-}
-
-static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
-}
-
-static void update_cpuid(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	if (!best)
-		return;
-
-	/* Update OSXSAVE bit */
-	if (cpu_has_xsave && best->function == 0x1) {
-		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-			best->ecx |= bit(X86_FEATURE_OSXSAVE);
-	}
-
-	if (apic) {
-		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
-			apic->lapic_timer.timer_mode_mask = 3 << 17;
-		else
-			apic->lapic_timer.timer_mode_mask = 1 << 17;
-	}
-}
-
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -655,7 +606,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		kvm_mmu_reset_context(vcpu);
 
 	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
-		update_cpuid(vcpu);
+		kvm_update_cpuid(vcpu);
 
 	return 0;
 }
@@ -809,6 +760,21 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
+bool kvm_rdpmc(struct kvm_vcpu *vcpu)
+{
+	u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	u64 data;
+	int err;
+
+	err = kvm_pmu_read_pmc(vcpu, ecx, &data);
+	if (err)
+		return err;
+	kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
+	return err;
+}
+EXPORT_SYMBOL_GPL(kvm_rdpmc);
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1358,12 +1324,11 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
 	if (page_num >= blob_size)
 		goto out;
 	r = -ENOMEM;
-	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!page)
+	page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
+	if (IS_ERR(page)) {
+		r = PTR_ERR(page);
 		goto out;
-	r = -EFAULT;
-	if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
-		goto out_free;
+	}
 	if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
 		goto out_free;
 	r = 0;
@@ -1652,8 +1617,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	 * which we perfectly emulate ;-). Any other value should be at least
 	 * reported, some guests depend on them.
 	 */
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
@@ -1665,8 +1628,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	/* at least RHEL 4 unconditionally writes to the perfctr registers,
 	 * so we ignore writes to make it happy.
 	 */
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
 	case MSR_K7_PERFCTR0:
 	case MSR_K7_PERFCTR1:
 	case MSR_K7_PERFCTR2:
@@ -1703,6 +1664,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_set_msr(vcpu, msr, data);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				msr, data);
@@ -1865,10 +1828,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K8_SYSCFG:
 	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_PERFCTR0:
 	case MSR_K8_INT_PENDING_MSG:
@@ -1979,6 +1938,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = 0xbe702111;
 		break;
 	default:
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_get_msr(vcpu, msr, pdata);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 			return 1;
@@ -2037,15 +1998,12 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 	if (msrs.nmsrs >= MAX_IO_MSRS)
 		goto out;
 
-	r = -ENOMEM;
 	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
-	entries = kmalloc(size, GFP_KERNEL);
-	if (!entries)
+	entries = memdup_user(user_msrs->entries, size);
+	if (IS_ERR(entries)) {
+		r = PTR_ERR(entries);
 		goto out;
-
-	r = -EFAULT;
-	if (copy_from_user(entries, user_msrs->entries, size))
-		goto out_free;
+	}
 
 	r = n = __msr_io(vcpu, &msrs, entries, do_msr);
 	if (r < 0)
@@ -2265,466 +2223,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 }
 
-static int is_efer_nx(void)
-{
-	unsigned long long efer = 0;
-
-	rdmsrl_safe(MSR_EFER, &efer);
-	return efer & EFER_NX;
-}
-
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
-{
-	int i;
-	struct kvm_cpuid_entry2 *e, *entry;
-
-	entry = NULL;
-	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
-		e = &vcpu->arch.cpuid_entries[i];
-		if (e->function == 0x80000001) {
-			entry = e;
-			break;
-		}
-	}
-	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
-		entry->edx &= ~(1 << 20);
-		printk(KERN_INFO "kvm: guest NX capability removed\n");
-	}
-}
-
-/* when an old userspace process fills a new kernel module */
-static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
-				    struct kvm_cpuid *cpuid,
-				    struct kvm_cpuid_entry __user *entries)
-{
-	int r, i;
-	struct kvm_cpuid_entry *cpuid_entries;
-
-	r = -E2BIG;
-	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
-		goto out;
-	r = -ENOMEM;
-	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
-	if (!cpuid_entries)
-		goto out;
-	r = -EFAULT;
-	if (copy_from_user(cpuid_entries, entries,
-			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
-		goto out_free;
-	for (i = 0; i < cpuid->nent; i++) {
-		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
-		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
-		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
-		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
-		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
-		vcpu->arch.cpuid_entries[i].index = 0;
-		vcpu->arch.cpuid_entries[i].flags = 0;
-		vcpu->arch.cpuid_entries[i].padding[0] = 0;
-		vcpu->arch.cpuid_entries[i].padding[1] = 0;
-		vcpu->arch.cpuid_entries[i].padding[2] = 0;
-	}
-	vcpu->arch.cpuid_nent = cpuid->nent;
-	cpuid_fix_nx_cap(vcpu);
-	r = 0;
-	kvm_apic_set_version(vcpu);
-	kvm_x86_ops->cpuid_update(vcpu);
-	update_cpuid(vcpu);
-
-out_free:
-	vfree(cpuid_entries);
-out:
-	return r;
-}
-
-static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
-				     struct kvm_cpuid2 *cpuid,
-				     struct kvm_cpuid_entry2 __user *entries)
-{
-	int r;
-
-	r = -E2BIG;
-	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
-		goto out;
-	r = -EFAULT;
-	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
-			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
-		goto out;
-	vcpu->arch.cpuid_nent = cpuid->nent;
-	kvm_apic_set_version(vcpu);
-	kvm_x86_ops->cpuid_update(vcpu);
-	update_cpuid(vcpu);
-	return 0;
-
-out:
-	return r;
-}
-
-static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
-				     struct kvm_cpuid2 *cpuid,
-				     struct kvm_cpuid_entry2 __user *entries)
-{
-	int r;
-
-	r = -E2BIG;
-	if (cpuid->nent < vcpu->arch.cpuid_nent)
-		goto out;
-	r = -EFAULT;
-	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
-			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
-		goto out;
-	return 0;
-
-out:
-	cpuid->nent = vcpu->arch.cpuid_nent;
-	return r;
-}
-
-static void cpuid_mask(u32 *word, int wordnum)
-{
-	*word &= boot_cpu_data.x86_capability[wordnum];
-}
-
-static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			   u32 index)
-{
-	entry->function = function;
-	entry->index = index;
-	cpuid_count(entry->function, entry->index,
-		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
-	entry->flags = 0;
-}
-
-static bool supported_xcr0_bit(unsigned bit)
-{
-	u64 mask = ((u64)1 << bit);
-
-	return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
-}
-
-#define F(x) bit(X86_FEATURE_##x)
-
-static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			 u32 index, int *nent, int maxnent)
-{
-	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-#ifdef CONFIG_X86_64
-	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
-				? F(GBPAGES) : 0;
-	unsigned f_lm = F(LM);
-#else
-	unsigned f_gbpages = 0;
-	unsigned f_lm = 0;
-#endif
-	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
-
-	/* cpuid 1.edx */
-	const u32 kvm_supported_word0_x86_features =
-		F(FPU) | F(VME) | F(DE) | F(PSE) |
-		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
-		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
-		0 /* Reserved, DS, ACPI */ | F(MMX) |
-		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
-		0 /* HTT, TM, Reserved, PBE */;
-	/* cpuid 0x80000001.edx */
-	const u32 kvm_supported_word1_x86_features =
-		F(FPU) | F(VME) | F(DE) | F(PSE) |
-		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
-		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-		F(PAT) | F(PSE36) | 0 /* Reserved */ |
-		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
-		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
-	/* cpuid 1.ecx */
-	const u32 kvm_supported_word4_x86_features =
-		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
-		0 /* DS-CPL, VMX, SMX, EST */ |
-		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
-		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
-		0 /* Reserved, DCA */ | F(XMM4_1) |
-		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
-		F(F16C) | F(RDRAND);
-	/* cpuid 0x80000001.ecx */
-	const u32 kvm_supported_word6_x86_features =
-		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
-		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
-		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
-		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
-
-	/* cpuid 0xC0000001.edx */
-	const u32 kvm_supported_word5_x86_features =
-		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
-		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
-		F(PMM) | F(PMM_EN);
-
-	/* cpuid 7.0.ebx */
-	const u32 kvm_supported_word9_x86_features =
-		F(SMEP) | F(FSGSBASE) | F(ERMS);
-
-	/* all calls to cpuid_count() should be made on the same cpu */
-	get_cpu();
-	do_cpuid_1_ent(entry, function, index);
-	++*nent;
-
-	switch (function) {
-	case 0:
-		entry->eax = min(entry->eax, (u32)0xd);
-		break;
-	case 1:
-		entry->edx &= kvm_supported_word0_x86_features;
-		cpuid_mask(&entry->edx, 0);
-		entry->ecx &= kvm_supported_word4_x86_features;
-		cpuid_mask(&entry->ecx, 4);
-		/* we support x2apic emulation even if host does not support
-		 * it since we emulate x2apic in software */
-		entry->ecx |= F(X2APIC);
-		break;
-	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
-	 * may return different values. This forces us to get_cpu() before
-	 * issuing the first command, and also to emulate this annoying behavior
-	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
-	case 2: {
-		int t, times = entry->eax & 0xff;
-
-		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
-		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-		for (t = 1; t < times && *nent < maxnent; ++t) {
-			do_cpuid_1_ent(&entry[t], function, 0);
-			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
-			++*nent;
-		}
-		break;
-	}
-	/* function 4 has additional index. */
-	case 4: {
-		int i, cache_type;
-
-		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		/* read more entries until cache_type is zero */
-		for (i = 1; *nent < maxnent; ++i) {
-			cache_type = entry[i - 1].eax & 0x1f;
-			if (!cache_type)
-				break;
-			do_cpuid_1_ent(&entry[i], function, i);
-			entry[i].flags |=
-			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-			++*nent;
-		}
-		break;
-	}
-	case 7: {
-		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		/* Mask ebx against host capbability word 9 */
-		if (index == 0) {
-			entry->ebx &= kvm_supported_word9_x86_features;
-			cpuid_mask(&entry->ebx, 9);
-		} else
-			entry->ebx = 0;
-		entry->eax = 0;
-		entry->ecx = 0;
-		entry->edx = 0;
-		break;
-	}
-	case 9:
-		break;
-	/* function 0xb has additional index. */
-	case 0xb: {
-		int i, level_type;
-
-		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		/* read more entries until level_type is zero */
-		for (i = 1; *nent < maxnent; ++i) {
-			level_type = entry[i - 1].ecx & 0xff00;
-			if (!level_type)
-				break;
-			do_cpuid_1_ent(&entry[i], function, i);
-			entry[i].flags |=
-			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-			++*nent;
-		}
-		break;
-	}
-	case 0xd: {
-		int idx, i;
-
-		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) {
-			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
-				continue;
-			entry[i].flags |=
-			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-			++*nent;
-			++i;
-		}
-		break;
-	}
-	case KVM_CPUID_SIGNATURE: {
-		char signature[12] = "KVMKVMKVM\0\0";
-		u32 *sigptr = (u32 *)signature;
-		entry->eax = 0;
-		entry->ebx = sigptr[0];
-		entry->ecx = sigptr[1];
-		entry->edx = sigptr[2];
-		break;
-	}
-	case KVM_CPUID_FEATURES:
-		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
-			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
-			     (1 << KVM_FEATURE_ASYNC_PF) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
-
-		if (sched_info_on())
-			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
-
-		entry->ebx = 0;
-		entry->ecx = 0;
-		entry->edx = 0;
-		break;
-	case 0x80000000:
-		entry->eax = min(entry->eax, 0x8000001a);
-		break;
-	case 0x80000001:
-		entry->edx &= kvm_supported_word1_x86_features;
-		cpuid_mask(&entry->edx, 1);
-		entry->ecx &= kvm_supported_word6_x86_features;
-		cpuid_mask(&entry->ecx, 6);
-		break;
-	case 0x80000008: {
-		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
-		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
-		unsigned phys_as = entry->eax & 0xff;
-
-		if (!g_phys_as)
-			g_phys_as = phys_as;
-		entry->eax = g_phys_as | (virt_as << 8);
-		entry->ebx = entry->edx = 0;
-		break;
-	}
-	case 0x80000019:
-		entry->ecx = entry->edx = 0;
-		break;
-	case 0x8000001a:
-		break;
-	case 0x8000001d:
-		break;
-	/*Add support for Centaur's CPUID instruction*/
-	case 0xC0000000:
-		/*Just support up to 0xC0000004 now*/
-		entry->eax = min(entry->eax, 0xC0000004);
-		break;
-	case 0xC0000001:
-		entry->edx &= kvm_supported_word5_x86_features;
-		cpuid_mask(&entry->edx, 5);
-		break;
-	case 3: /* Processor serial number */
-	case 5: /* MONITOR/MWAIT */
-	case 6: /* Thermal management */
-	case 0xA: /* Architectural Performance Monitoring */
-	case 0x80000007: /* Advanced power management */
-	case 0xC0000002:
-	case 0xC0000003:
-	case 0xC0000004:
-	default:
-		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
-		break;
-	}
-
-	kvm_x86_ops->set_supported_cpuid(function, entry);
-
-	put_cpu();
-}
-
-#undef F
-
-static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				     struct kvm_cpuid_entry2 __user *entries)
-{
-	struct kvm_cpuid_entry2 *cpuid_entries;
-	int limit, nent = 0, r = -E2BIG;
-	u32 func;
-
-	if (cpuid->nent < 1)
-		goto out;
-	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
-		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
-	r = -ENOMEM;
-	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
-	if (!cpuid_entries)
-		goto out;
-
-	do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
-	limit = cpuid_entries[0].eax;
-	for (func = 1; func <= limit && nent < cpuid->nent; ++func)
-		do_cpuid_ent(&cpuid_entries[nent], func, 0,
-			     &nent, cpuid->nent);
-	r = -E2BIG;
-	if (nent >= cpuid->nent)
-		goto out_free;
-
-	do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
-	limit = cpuid_entries[nent - 1].eax;
-	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
-		do_cpuid_ent(&cpuid_entries[nent], func, 0,
-			     &nent, cpuid->nent);
-
-
-
-	r = -E2BIG;
-	if (nent >= cpuid->nent)
-		goto out_free;
-
-	/* Add support for Centaur's CPUID instruction. */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
-		do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
-				&nent, cpuid->nent);
-
-		r = -E2BIG;
-		if (nent >= cpuid->nent)
-			goto out_free;
-
-		limit = cpuid_entries[nent - 1].eax;
-		for (func = 0xC0000001;
-			func <= limit && nent < cpuid->nent; ++func)
-			do_cpuid_ent(&cpuid_entries[nent], func, 0,
-					&nent, cpuid->nent);
-
-		r = -E2BIG;
-		if (nent >= cpuid->nent)
-			goto out_free;
-	}
-
-	do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
-		     cpuid->nent);
-
-	r = -E2BIG;
-	if (nent >= cpuid->nent)
-		goto out_free;
-
-	do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
-		     cpuid->nent);
-
-	r = -E2BIG;
-	if (nent >= cpuid->nent)
-		goto out_free;
-
-	r = -EFAULT;
-	if (copy_to_user(entries, cpuid_entries,
-			 nent * sizeof(struct kvm_cpuid_entry2)))
-		goto out_free;
-	cpuid->nent = nent;
-	r = 0;
-
-out_free:
-	vfree(cpuid_entries);
-out:
-	return r;
-}
-
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
@@ -3042,13 +2540,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = -EINVAL;
 		if (!vcpu->arch.apic)
 			goto out;
-		u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
-		r = -ENOMEM;
-		if (!u.lapic)
-			goto out;
-		r = -EFAULT;
-		if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
+		u.lapic = memdup_user(argp, sizeof(*u.lapic));
+		if (IS_ERR(u.lapic)) {
+			r = PTR_ERR(u.lapic);
 			goto out;
+		}
+
 		r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
 		if (r)
 			goto out;
@@ -3227,14 +2724,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_SET_XSAVE: {
-		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
-		r = -ENOMEM;
-		if (!u.xsave)
-			break;
-
-		r = -EFAULT;
-		if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
-			break;
+		u.xsave = memdup_user(argp, sizeof(*u.xsave));
+		if (IS_ERR(u.xsave)) {
+			r = PTR_ERR(u.xsave);
+			goto out;
+		}
 
 		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
 		break;
@@ -3255,15 +2749,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_SET_XCRS: {
-		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
-		r = -ENOMEM;
-		if (!u.xcrs)
-			break;
-
-		r = -EFAULT;
-		if (copy_from_user(u.xcrs, argp,
-				   sizeof(struct kvm_xcrs)))
-			break;
+		u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
+		if (IS_ERR(u.xcrs)) {
+			r = PTR_ERR(u.xcrs);
+			goto out;
+		}
 
 		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
 		break;
@@ -3460,16 +2950,59 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
+/**
+ * write_protect_slot - write protect a slot for dirty logging
+ * @kvm: the kvm instance
+ * @memslot: the slot we protect
+ * @dirty_bitmap: the bitmap indicating which pages are dirty
+ * @nr_dirty_pages: the number of dirty pages
+ *
+ * We have two ways to find all sptes to protect:
+ * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
+ *    checks ones that have a spte mapping a page in the slot.
+ * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+ *
+ * Generally speaking, if there are not so many dirty pages compared to the
+ * number of shadow pages, we should use the latter.
+ *
+ * Note that letting others write into a page marked dirty in the old bitmap
+ * by using the remaining tlb entry is not a problem.  That page will become
+ * write protected again when we flush the tlb and then be reported dirty to
+ * the user space by copying the old bitmap.
+ */
+static void write_protect_slot(struct kvm *kvm,
+			       struct kvm_memory_slot *memslot,
+			       unsigned long *dirty_bitmap,
+			       unsigned long nr_dirty_pages)
+{
+	/* Not many dirty pages compared to # of shadow pages. */
+	if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
+		unsigned long gfn_offset;
+
+		for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
+			unsigned long gfn = memslot->base_gfn + gfn_offset;
+
+			spin_lock(&kvm->mmu_lock);
+			kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
+			spin_unlock(&kvm->mmu_lock);
+		}
+		kvm_flush_remote_tlbs(kvm);
+	} else {
+		spin_lock(&kvm->mmu_lock);
+		kvm_mmu_slot_remove_write_access(kvm, memslot->id);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r, i;
+	int r;
 	struct kvm_memory_slot *memslot;
-	unsigned long n;
-	unsigned long is_dirty = 0;
+	unsigned long n, nr_dirty_pages;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -3477,43 +3010,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[log->slot];
+	memslot = id_to_memslot(kvm->memslots, log->slot);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
 
 	n = kvm_dirty_bitmap_bytes(memslot);
-
-	for (i = 0; !is_dirty && i < n/sizeof(long); i++)
-		is_dirty = memslot->dirty_bitmap[i];
+	nr_dirty_pages = memslot->nr_dirty_pages;
 
 	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
+	if (nr_dirty_pages) {
 		struct kvm_memslots *slots, *old_slots;
-		unsigned long *dirty_bitmap;
+		unsigned long *dirty_bitmap, *dirty_bitmap_head;
 
-		dirty_bitmap = memslot->dirty_bitmap_head;
-		if (memslot->dirty_bitmap == dirty_bitmap)
-			dirty_bitmap += n / sizeof(long);
-		memset(dirty_bitmap, 0, n);
+		dirty_bitmap = memslot->dirty_bitmap;
+		dirty_bitmap_head = memslot->dirty_bitmap_head;
+		if (dirty_bitmap == dirty_bitmap_head)
+			dirty_bitmap_head += n / sizeof(long);
+		memset(dirty_bitmap_head, 0, n);
 
 		r = -ENOMEM;
-		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL);
 		if (!slots)
 			goto out;
-		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
-		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
-		slots->generation++;
+
+		memslot = id_to_memslot(slots, log->slot);
+		memslot->nr_dirty_pages = 0;
+		memslot->dirty_bitmap = dirty_bitmap_head;
+		update_memslots(slots, NULL);
 
 		old_slots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
 		synchronize_srcu_expedited(&kvm->srcu);
-		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
 		kfree(old_slots);
 
-		spin_lock(&kvm->mmu_lock);
-		kvm_mmu_slot_remove_write_access(kvm, log->slot);
-		spin_unlock(&kvm->mmu_lock);
+		write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
 
 		r = -EFAULT;
 		if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
@@ -3658,14 +3189,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+		struct kvm_irqchip *chip;
 
-		r = -ENOMEM;
-		if (!chip)
+		chip = memdup_user(argp, sizeof(*chip));
+		if (IS_ERR(chip)) {
+			r = PTR_ERR(chip);
 			goto out;
-		r = -EFAULT;
-		if (copy_from_user(chip, argp, sizeof *chip))
-			goto get_irqchip_out;
+		}
+
 		r = -ENXIO;
 		if (!irqchip_in_kernel(kvm))
 			goto get_irqchip_out;
@@ -3684,14 +3215,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_SET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+		struct kvm_irqchip *chip;
 
-		r = -ENOMEM;
-		if (!chip)
+		chip = memdup_user(argp, sizeof(*chip));
+		if (IS_ERR(chip)) {
+			r = PTR_ERR(chip);
 			goto out;
-		r = -EFAULT;
-		if (copy_from_user(chip, argp, sizeof *chip))
-			goto set_irqchip_out;
+		}
+
 		r = -ENXIO;
 		if (!irqchip_in_kernel(kvm))
 			goto set_irqchip_out;
@@ -3898,12 +3429,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
 	kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
-static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
-{
-	return gpa;
-}
-
-static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
 {
 	gpa_t t_gpa;
 	struct x86_exception exception;
@@ -4087,7 +3613,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0)
 		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
 	return 1;
 }
 
@@ -4324,7 +3850,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 	if (!exchanged)
 		return X86EMUL_CMPXCHG_FAILED;
 
-	kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
+	kvm_mmu_pte_write(vcpu, gpa, new, bytes);
 
 	return X86EMUL_CONTINUE;
 
@@ -4349,32 +3875,24 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	return r;
 }
 
-
-static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
-				    int size, unsigned short port, void *val,
-				    unsigned int count)
+static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
+			       unsigned short port, void *val,
+			       unsigned int count, bool in)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
-	if (vcpu->arch.pio.count)
-		goto data_avail;
-
-	trace_kvm_pio(0, port, size, count);
+	trace_kvm_pio(!in, port, size, count);
 
 	vcpu->arch.pio.port = port;
-	vcpu->arch.pio.in = 1;
+	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.count  = count;
 	vcpu->arch.pio.size = size;
 
 	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
-	data_avail:
-		memcpy(val, vcpu->arch.pio_data, size * count);
 		vcpu->arch.pio.count = 0;
 		return 1;
 	}
 
 	vcpu->run->exit_reason = KVM_EXIT_IO;
-	vcpu->run->io.direction = KVM_EXIT_IO_IN;
+	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 	vcpu->run->io.size = size;
 	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
 	vcpu->run->io.count = count;
@@ -4383,36 +3901,37 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	return 0;
 }
 
-static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
-				     int size, unsigned short port,
-				     const void *val, unsigned int count)
+static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+				    int size, unsigned short port, void *val,
+				    unsigned int count)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	int ret;
 
-	trace_kvm_pio(1, port, size, count);
-
-	vcpu->arch.pio.port = port;
-	vcpu->arch.pio.in = 0;
-	vcpu->arch.pio.count = count;
-	vcpu->arch.pio.size = size;
-
-	memcpy(vcpu->arch.pio_data, val, size * count);
+	if (vcpu->arch.pio.count)
+		goto data_avail;
 
-	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
+	ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
+	if (ret) {
+data_avail:
+		memcpy(val, vcpu->arch.pio_data, size * count);
 		vcpu->arch.pio.count = 0;
 		return 1;
 	}
 
-	vcpu->run->exit_reason = KVM_EXIT_IO;
-	vcpu->run->io.direction = KVM_EXIT_IO_OUT;
-	vcpu->run->io.size = size;
-	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
-	vcpu->run->io.count = count;
-	vcpu->run->io.port = port;
-
 	return 0;
 }
 
+static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
+				     int size, unsigned short port,
+				     const void *val, unsigned int count)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+	memcpy(vcpu->arch.pio_data, val, size * count);
+	return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
+}
+
 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
 {
 	return kvm_x86_ops->get_segment_base(vcpu, seg);
@@ -4627,6 +4146,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 }
 
+static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
+			     u32 pmc, u64 *pdata)
+{
+	return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
+}
+
 static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 {
 	emul_to_vcpu(ctxt)->arch.halt_request = 1;
@@ -4679,6 +4204,7 @@ static struct x86_emulate_ops emulate_ops = {
 	.set_dr              = emulator_set_dr,
 	.set_msr             = emulator_set_msr,
 	.get_msr             = emulator_get_msr,
+	.read_pmc            = emulator_read_pmc,
 	.halt                = emulator_halt,
 	.wbinvd              = emulator_wbinvd,
 	.fix_hypercall       = emulator_fix_hypercall,
@@ -4836,6 +4362,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 	return false;
 }
 
+static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
+			      unsigned long cr2,  int emulation_type)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+
+	last_retry_eip = vcpu->arch.last_retry_eip;
+	last_retry_addr = vcpu->arch.last_retry_addr;
+
+	/*
+	 * If the emulation is caused by #PF and it is non-page_table
+	 * writing instruction, it means the VM-EXIT is caused by shadow
+	 * page protected, we can zap the shadow page and retry this
+	 * instruction directly.
+	 *
+	 * Note: if the guest uses a non-page-table modifying instruction
+	 * on the PDE that points to the instruction, then we will unmap
+	 * the instruction and go to an infinite loop. So, we cache the
+	 * last retried eip and the last fault address, if we meet the eip
+	 * and the address again, we can break out of the potential infinite
+	 * loop.
+	 */
+	vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
+
+	if (!(emulation_type & EMULTYPE_RETRY))
+		return false;
+
+	if (x86_page_table_writing_insn(ctxt))
+		return false;
+
+	if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
+		return false;
+
+	vcpu->arch.last_retry_eip = ctxt->eip;
+	vcpu->arch.last_retry_addr = cr2;
+
+	if (!vcpu->arch.mmu.direct_map)
+		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+
+	kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	return true;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4877,6 +4447,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		return EMULATE_DONE;
 	}
 
+	if (retry_instruction(ctxt, cr2, emulation_type))
+		return EMULATE_DONE;
+
 	/* this is needed for vmware backdoor interface to work since it
 	   changes registers values  during IO operation */
 	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
@@ -5095,17 +4668,17 @@ static void kvm_timer_init(void)
 
 static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 
-static int kvm_is_in_guest(void)
+int kvm_is_in_guest(void)
 {
-	return percpu_read(current_vcpu) != NULL;
+	return __this_cpu_read(current_vcpu) != NULL;
 }
 
 static int kvm_is_user_mode(void)
 {
 	int user_mode = 3;
 
-	if (percpu_read(current_vcpu))
-		user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
+	if (__this_cpu_read(current_vcpu))
+		user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
 
 	return user_mode != 0;
 }
@@ -5114,8 +4687,8 @@ static unsigned long kvm_get_guest_ip(void)
 {
 	unsigned long ip = 0;
 
-	if (percpu_read(current_vcpu))
-		ip = kvm_rip_read(percpu_read(current_vcpu));
+	if (__this_cpu_read(current_vcpu))
+		ip = kvm_rip_read(__this_cpu_read(current_vcpu));
 
 	return ip;
 }
@@ -5128,13 +4701,13 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
 {
-	percpu_write(current_vcpu, vcpu);
+	__this_cpu_write(current_vcpu, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
 
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
 {
-	percpu_write(current_vcpu, NULL);
+	__this_cpu_write(current_vcpu, NULL);
 }
 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
 
@@ -5233,15 +4806,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
-static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
-			   unsigned long a1)
-{
-	if (is_long_mode(vcpu))
-		return a0;
-	else
-		return a0 | ((gpa_t)a1 << 32);
-}
-
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	u64 param, ingpa, outgpa, ret;
@@ -5337,9 +4901,6 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
-	case KVM_HC_MMU_OP:
-		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
-		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
@@ -5369,125 +4930,6 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 	return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
 }
 
-static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
-{
-	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
-	int j, nent = vcpu->arch.cpuid_nent;
-
-	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
-	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; ; j = (j + 1) % nent) {
-		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
-		if (ej->function == e->function) {
-			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-			return j;
-		}
-	}
-	return 0; /* silence gcc, even though control never reaches here */
-}
-
-/* find an entry with matching function, matching index (if needed), and that
- * should be read next (if it's stateful) */
-static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
-	u32 function, u32 index)
-{
-	if (e->function != function)
-		return 0;
-	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
-		return 0;
-	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
-	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
-		return 0;
-	return 1;
-}
-
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
-					      u32 function, u32 index)
-{
-	int i;
-	struct kvm_cpuid_entry2 *best = NULL;
-
-	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
-		struct kvm_cpuid_entry2 *e;
-
-		e = &vcpu->arch.cpuid_entries[i];
-		if (is_matching_cpuid_entry(e, function, index)) {
-			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
-				move_to_next_stateful_cpuid_entry(vcpu, i);
-			best = e;
-			break;
-		}
-	}
-	return best;
-}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
-
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
-	if (!best || best->eax < 0x80000008)
-		goto not_found;
-	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-	if (best)
-		return best->eax & 0xff;
-not_found:
-	return 36;
-}
-
-/*
- * If no match is found, check whether we exceed the vCPU's limit
- * and return the content of the highest valid _standard_ leaf instead.
- * This is to satisfy the CPUID specification.
- */
-static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
-                                                  u32 function, u32 index)
-{
-	struct kvm_cpuid_entry2 *maxlevel;
-
-	maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
-	if (!maxlevel || maxlevel->eax >= function)
-		return NULL;
-	if (function & 0x80000000) {
-		maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
-		if (!maxlevel)
-			return NULL;
-	}
-	return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
-}
-
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
-{
-	u32 function, index;
-	struct kvm_cpuid_entry2 *best;
-
-	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
-	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
-	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
-	best = kvm_find_cpuid_entry(vcpu, function, index);
-
-	if (!best)
-		best = check_cpuid_limit(vcpu, function, index);
-
-	if (best) {
-		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
-		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
-		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
-		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
-	}
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	trace_kvm_cpuid(function,
-			kvm_register_read(vcpu, VCPU_REGS_RAX),
-			kvm_register_read(vcpu, VCPU_REGS_RBX),
-			kvm_register_read(vcpu, VCPU_REGS_RCX),
-			kvm_register_read(vcpu, VCPU_REGS_RDX));
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
-
 /*
  * Check if userspace requested an interrupt window, and that the
  * interrupt window is open.
@@ -5648,6 +5090,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	int r;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
+	bool req_immediate_exit = 0;
 
 	if (vcpu->requests) {
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
@@ -5687,7 +5130,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			record_steal_time(vcpu);
 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
 			process_nmi(vcpu);
-
+		req_immediate_exit =
+			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
+		if (kvm_check_request(KVM_REQ_PMU, vcpu))
+			kvm_handle_pmu_event(vcpu);
+		if (kvm_check_request(KVM_REQ_PMI, vcpu))
+			kvm_deliver_pmi(vcpu);
 	}
 
 	r = kvm_mmu_reload(vcpu);
@@ -5738,6 +5186,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
+	if (req_immediate_exit)
+		smp_send_reschedule(vcpu->cpu);
+
 	kvm_guest_enter();
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -5943,10 +5394,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (r <= 0)
 		goto out;
 
-	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
-		kvm_register_write(vcpu, VCPU_REGS_RAX,
-				     kvm_run->hypercall.ret);
-
 	r = __vcpu_run(vcpu);
 
 out:
@@ -6148,7 +5595,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
 	if (sregs->cr4 & X86_CR4_OSXSAVE)
-		update_cpuid(vcpu);
+		kvm_update_cpuid(vcpu);
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
@@ -6425,6 +5872,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	vcpu->arch.apf.halted = false;
 
+	kvm_pmu_reset(vcpu);
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -6473,10 +5922,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm = vcpu->kvm;
 
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
-	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.mmu.translate_gpa = translate_gpa;
-	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
@@ -6513,6 +5958,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_mce_banks;
 
 	kvm_async_pf_hash_reset(vcpu);
+	kvm_pmu_init(vcpu);
 
 	return 0;
 fail_free_mce_banks:
@@ -6531,6 +5977,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
+	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d36fe23..cb80c29 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -33,9 +33,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
 	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
 }
 
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
-                                             u32 function, u32 index);
-
 static inline bool is_protmode(struct kvm_vcpu *vcpu)
 {
 	return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
@@ -125,4 +122,6 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
+extern u64 host_xcr0;
+
 #endif
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 26c731a..fdce49c 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -29,7 +29,8 @@ config XEN_PVHVM
 
 config XEN_MAX_DOMAIN_MEMORY
        int
-       default 128
+       default 500 if X86_64
+       default 64 if X86_32
        depends on XEN
        help
          This only affects the sizing of some bss arrays, the unused
@@ -48,3 +49,4 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
+
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 5a40d24..3a5f55d 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -54,6 +54,20 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
 	return 0;
 }
 
+/*
+ * This function is used to map shared frames to store grant status. It is
+ * different from map_pte_fn above, the frames type here is uint64_t.
+ */
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+			     unsigned long addr, void *data)
+{
+	uint64_t **frames = (uint64_t **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 			unsigned long addr, void *data)
 {
@@ -64,10 +78,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 			   unsigned long max_nr_gframes,
-			   struct grant_entry **__shared)
+			   void **__shared)
 {
 	int rc;
-	struct grant_entry *shared = *__shared;
+	void *shared = *__shared;
 
 	if (shared == NULL) {
 		struct vm_struct *area =
@@ -83,8 +97,30 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 	return rc;
 }
 
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-			      unsigned long nr_gframes)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   grant_status_t **__shared)
+{
+	int rc;
+	grant_status_t *shared = *__shared;
+
+	if (shared == NULL) {
+		/* No need to pass in PTE as we are going to do it
+		 * in apply_to_page_range anyhow. */
+		struct vm_struct *area =
+			alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
+		BUG_ON(area == NULL);
+		shared = area->addr;
+		*__shared = shared;
+	}
+
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn_status, &frames);
+	return rc;
+}
+
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
 {
 	apply_to_page_range(&init_mm, (unsigned long)shared,
 			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f4bf8aa..58a0e46 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1852,7 +1852,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	xen_write_cr3(__pa(initial_page_table));
 
 	memblock_reserve(__pa(xen_start_info->pt_base),
-			 xen_start_info->nr_pt_frames * PAGE_SIZE));
+			 xen_start_info->nr_pt_frames * PAGE_SIZE);
 
 	return initial_page_table;
 }