From ded467374a34eb80020c2213456b1d9ca946b88c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 10:53:48 +0200
Subject: x86/amd-iommu: Move compl-wait command building to own function

This patch introduces a seperate function for building
completion-wait commands.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca777..eebd504 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -383,6 +383,13 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
+static void build_completion_wait(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = CMD_COMPL_WAIT_INT_MASK;
+	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -458,9 +465,7 @@ static int __iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
 
-	 memset(&cmd, 0, sizeof(cmd));
-	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	build_completion_wait(&cmd);
 
 	 return __iommu_queue_command(iommu, &cmd);
 }
-- 
cgit v0.10.2


From 94fe79e2f100bfcd8e7689cbf8838634779b80a2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 11:07:21 +0200
Subject: x86/amd-iommu: Move inv-dte command building to own function

This patch moves command building for the invalidate-dte
command into its own function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index eebd504..4e5631a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -390,6 +390,13 @@ static void build_completion_wait(struct iommu_cmd *cmd)
 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
+static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = devid;
+	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -533,10 +540,7 @@ static int iommu_flush_device(struct device *dev)
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	/* Build command */
-	memset(&cmd, 0, sizeof(cmd));
-	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
-	cmd.data[0] = devid;
+	build_inv_dte(&cmd, devid);
 
 	return iommu_queue_command(iommu, &cmd);
 }
-- 
cgit v0.10.2


From 11b6402c6673b530fac9920c5640c75e99fee956 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 11:49:28 +0200
Subject: x86/amd-iommu: Cleanup inv_pages command handling

This patch reworks the processing of invalidate-pages
commands to the IOMMU. The function building the the command
is extended so we can get rid of another function. It was
also renamed to match with the other function names.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 4e5631a..f8ec28e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -397,6 +397,37 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
 	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
 }
 
+static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+				  size_t size, u16 domid, int pde)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[1] |= domid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -545,37 +576,6 @@ static int iommu_flush_device(struct device *dev)
 	return iommu_queue_command(iommu, &cmd);
 }
 
-static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
-					  u16 domid, int pde, int s)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
-	cmd->data[1] |= domid;
-	cmd->data[2] = lower_32_bits(address);
-	cmd->data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
-}
-
-/*
- * Generic command send function for invalidaing TLB entries
- */
-static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
-		u64 address, u16 domid, int pde, int s)
-{
-	struct iommu_cmd cmd;
-	int ret;
-
-	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
-
-	ret = iommu_queue_command(iommu, &cmd);
-
-	return ret;
-}
-
 /*
  * TLB invalidation function which is called from the mapping functions.
  * It invalidates a single PTE if the range to flush is within a single
@@ -584,20 +584,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 static void __iommu_flush_pages(struct protection_domain *domain,
 				u64 address, size_t size, int pde)
 {
-	int s = 0, i;
-	unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
-
-	address &= PAGE_MASK;
-
-	if (pages > 1) {
-		/*
-		 * If we have to flush more than one page, flush all
-		 * TLB entries for this domain
-		 */
-		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-		s = 1;
-	}
+	struct iommu_cmd cmd;
+	int ret = 0, i;
 
+	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
 
 	for (i = 0; i < amd_iommus_present; ++i) {
 		if (!domain->dev_iommu[i])
@@ -607,11 +597,10 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 		 * Devices of this domain are behind this IOMMU
 		 * We need a TLB flush
 		 */
-		iommu_queue_inv_iommu_pages(amd_iommus[i], address,
-					    domain->id, pde, s);
+		ret |= iommu_queue_command(amd_iommus[i], &cmd);
 	}
 
-	return;
+	WARN_ON(ret);
 }
 
 static void iommu_flush_pages(struct protection_domain *domain,
-- 
cgit v0.10.2


From 815b33fdc279d34ab40a8bfe1866623a4cc5669b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 17:26:49 +0200
Subject: x86/amd-iommu: Cleanup completion-wait handling

This patch cleans up the implementation of completion-wait
command sending. It also switches the completion indicator
from the MMIO bit to a memory store which can be checked
without IOMMU locking.
As a side effect this patch makes the __iommu_queue_command
function obsolete and so it is removed too.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f8ec28e..073c64b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -25,6 +25,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
+#include <linux/delay.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -34,7 +35,7 @@
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
-#define EXIT_LOOP_COUNT 10000000
+#define LOOP_TIMEOUT	100000
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -383,10 +384,14 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
-static void build_completion_wait(struct iommu_cmd *cmd)
+static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
+	WARN_ON(address & 0x7ULL);
+
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->data[0] = CMD_COMPL_WAIT_INT_MASK;
+	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
+	cmd->data[1] = upper_32_bits(__pa(address));
+	cmd->data[2] = 1;
 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
@@ -432,12 +437,14 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
  */
-static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	unsigned long flags;
 	u32 tail, head;
 	u8 *target;
 
 	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+	spin_lock_irqsave(&iommu->lock, flags);
 	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 	target = iommu->cmd_buf + tail;
 	memcpy_toio(target, cmd, sizeof(*cmd));
@@ -446,99 +453,41 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 	if (tail == head)
 		return -ENOMEM;
 	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-
-	return 0;
-}
-
-/*
- * General queuing function for commands. Takes iommu->lock and calls
- * __iommu_queue_command().
- */
-static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&iommu->lock, flags);
-	ret = __iommu_queue_command(iommu, cmd);
-	if (!ret)
-		iommu->need_sync = true;
+	iommu->need_sync = true;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return ret;
-}
-
-/*
- * This function waits until an IOMMU has completed a completion
- * wait command
- */
-static void __iommu_wait_for_completion(struct amd_iommu *iommu)
-{
-	int ready = 0;
-	unsigned status = 0;
-	unsigned long i = 0;
-
-	INC_STATS_COUNTER(compl_wait);
-
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
-	}
-
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		iommu->reset_in_progress = true;
+	return 0;
 }
 
 /*
  * This function queues a completion wait command into the command
  * buffer of an IOMMU
  */
-static int __iommu_completion_wait(struct amd_iommu *iommu)
-{
-	struct iommu_cmd cmd;
-
-	build_completion_wait(&cmd);
-
-	 return __iommu_queue_command(iommu, &cmd);
-}
-
-/*
- * This function is called whenever we need to ensure that the IOMMU has
- * completed execution of all commands we sent. It sends a
- * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
- * us about that by writing a value to a physical address we pass with
- * the command.
- */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
-	int ret = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
+	struct iommu_cmd cmd;
+	volatile u64 sem = 0;
+	int ret, i = 0;
 
 	if (!iommu->need_sync)
-		goto out;
-
-	ret = __iommu_completion_wait(iommu);
+		return 0;
 
-	iommu->need_sync = false;
+	build_completion_wait(&cmd, (u64)&sem);
 
+	ret = iommu_queue_command(iommu, &cmd);
 	if (ret)
-		goto out;
-
-	__iommu_wait_for_completion(iommu);
+		return ret;
 
-out:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	while (sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
 
-	if (iommu->reset_in_progress)
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		iommu->reset_in_progress = true;
 		reset_iommu_command_buffer(iommu);
+	}
 
 	return 0;
 }
-- 
cgit v0.10.2


From 61985a040f17c03b09a2772508ee02729571365b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 17:46:49 +0200
Subject: x86/amd-iommu: Remove command buffer resetting logic

The logic to reset the command buffer caused more problems
than it actually helped. The logic jumped in when the IOMMU
hardware doesn't execute commands anymore but the reasons
for this are usually not fixed by just resetting the command
buffer. So the code can be removed to reduce complexity.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc..878ae00 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -409,9 +409,6 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;
 
-	/* becomes true if a command buffer reset is running */
-	bool reset_in_progress;
-
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 073c64b..0147c5c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -58,7 +58,6 @@ struct iommu_cmd {
 	u32 data[4];
 };
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu);
 static void update_domain(struct protection_domain *domain);
 
 /****************************************************************************
@@ -323,8 +322,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
@@ -485,8 +482,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	if (i == LOOP_TIMEOUT) {
 		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
+		ret = -EIO;
 	}
 
 	return 0;
@@ -628,20 +624,6 @@ void amd_iommu_flush_all_domains(void)
 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
 }
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu)
-{
-	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
-
-	if (iommu->reset_in_progress)
-		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
-
-	amd_iommu_reset_cmd_buffer(iommu);
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
-
-	iommu->reset_in_progress = false;
-}
-
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v0.10.2


From 17b124bf1463582005d662d4dd95f037ad863c57 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:01:35 +0200
Subject: x86/amd-iommu: Rename iommu_flush* to domain_flush*

These functions all operate on protection domains and not on
singe IOMMUs. Represent that in their name.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0147c5c..9d66b20 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -488,22 +488,6 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	return 0;
 }
 
-static void iommu_flush_complete(struct protection_domain *domain)
-{
-	int i;
-
-	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
-			continue;
-
-		/*
-		 * Devices of this domain are behind this IOMMU
-		 * We need to wait for completion of all commands.
-		 */
-		iommu_completion_wait(amd_iommus[i]);
-	}
-}
-
 /*
  * Command send function for invalidating a device table entry
  */
@@ -526,8 +510,8 @@ static int iommu_flush_device(struct device *dev)
  * It invalidates a single PTE if the range to flush is within a single
  * page. Otherwise it flushes the whole TLB of the IOMMU.
  */
-static void __iommu_flush_pages(struct protection_domain *domain,
-				u64 address, size_t size, int pde)
+static void __domain_flush_pages(struct protection_domain *domain,
+				 u64 address, size_t size, int pde)
 {
 	struct iommu_cmd cmd;
 	int ret = 0, i;
@@ -548,29 +532,45 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 	WARN_ON(ret);
 }
 
-static void iommu_flush_pages(struct protection_domain *domain,
-			     u64 address, size_t size)
+static void domain_flush_pages(struct protection_domain *domain,
+			       u64 address, size_t size)
 {
-	__iommu_flush_pages(domain, address, size, 0);
+	__domain_flush_pages(domain, address, size, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain */
-static void iommu_flush_tlb(struct protection_domain *domain)
+static void domain_flush_tlb(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void iommu_flush_tlb_pde(struct protection_domain *domain)
+static void domain_flush_tlb_pde(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+}
+
+static void domain_flush_complete(struct protection_domain *domain)
+{
+	int i;
+
+	for (i = 0; i < amd_iommus_present; ++i) {
+		if (!domain->dev_iommu[i])
+			continue;
+
+		/*
+		 * Devices of this domain are behind this IOMMU
+		 * We need to wait for completion of all commands.
+		 */
+		iommu_completion_wait(amd_iommus[i]);
+	}
 }
 
 
 /*
  * This function flushes the DTEs for all devices in domain
  */
-static void iommu_flush_domain_devices(struct protection_domain *domain)
+static void domain_flush_devices(struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	unsigned long flags;
@@ -591,8 +591,8 @@ static void iommu_flush_all_domain_devices(void)
 	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
 
 	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		iommu_flush_domain_devices(domain);
-		iommu_flush_complete(domain);
+		domain_flush_devices(domain);
+		domain_flush_complete(domain);
 	}
 
 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
@@ -616,8 +616,8 @@ void amd_iommu_flush_all_domains(void)
 
 	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
 		spin_lock(&domain->lock);
-		iommu_flush_tlb_pde(domain);
-		iommu_flush_complete(domain);
+		domain_flush_tlb_pde(domain);
+		domain_flush_complete(domain);
 		spin_unlock(&domain->lock);
 	}
 
@@ -1480,7 +1480,7 @@ static int attach_device(struct device *dev,
 	 * left the caches in the IOMMU dirty. So we have to flush
 	 * here to evict all dirty stuff.
 	 */
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return ret;
 }
@@ -1693,8 +1693,9 @@ static void update_domain(struct protection_domain *domain)
 		return;
 
 	update_device_table(domain);
-	iommu_flush_domain_devices(domain);
-	iommu_flush_tlb_pde(domain);
+
+	domain_flush_devices(domain);
+	domain_flush_tlb_pde(domain);
 
 	domain->updated = false;
 }
@@ -1853,10 +1854,10 @@ retry:
 	ADD_STATS_COUNTER(alloced_io_mem, size);
 
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-		iommu_flush_tlb(&dma_dom->domain);
+		domain_flush_tlb(&dma_dom->domain);
 		dma_dom->need_flush = false;
 	} else if (unlikely(amd_iommu_np_cache))
-		iommu_flush_pages(&dma_dom->domain, address, size);
+		domain_flush_pages(&dma_dom->domain, address, size);
 
 out:
 	return address;
@@ -1905,7 +1906,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-		iommu_flush_pages(&dma_dom->domain, flush_addr, size);
+		domain_flush_pages(&dma_dom->domain, flush_addr, size);
 		dma_dom->need_flush = false;
 	}
 }
@@ -1941,7 +1942,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 	if (addr == DMA_ERROR_CODE)
 		goto out;
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -1968,7 +1969,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, dir);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2033,7 +2034,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2079,7 +2080,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		s->dma_address = s->dma_length = 0;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2129,7 +2130,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		goto out_free;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2161,7 +2162,7 @@ static void free_coherent(struct device *dev, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2471,7 +2472,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return get_order(unmap_size);
 }
-- 
cgit v0.10.2


From ac0ea6e92b2227c86fe4f7f9eb429071d617a25d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:38:20 +0200
Subject: x86/amd-iommu: Improve handling of full command buffer

This patch improved the handling of commands when the IOMMU
command buffer is nearly full. In this case it issues an
completion wait command and waits until the IOMMU has
processed it before continuing queuing new commands.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 9d66b20..75c7f8c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -381,6 +381,39 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
+static int wait_on_sem(volatile u64 *sem)
+{
+	int i = 0;
+
+	while (*sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
+
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void copy_cmd_to_buffer(struct amd_iommu *iommu,
+			       struct iommu_cmd *cmd,
+			       u32 tail)
+{
+	u8 *target;
+
+	target = iommu->cmd_buf + tail;
+	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+
+	/* Copy command to buffer */
+	memcpy(target, cmd, sizeof(*cmd));
+
+	/* Tell the IOMMU about it */
+	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+}
+
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
 	WARN_ON(address & 0x7ULL);
@@ -432,25 +465,44 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 
 /*
  * Writes the command to the IOMMUs command buffer and informs the
- * hardware about the new command. Must be called with iommu->lock held.
+ * hardware about the new command.
  */
 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	u32 left, tail, head, next_tail;
 	unsigned long flags;
-	u32 tail, head;
-	u8 *target;
 
 	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+
+again:
 	spin_lock_irqsave(&iommu->lock, flags);
-	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-	target = iommu->cmd_buf + tail;
-	memcpy_toio(target, cmd, sizeof(*cmd));
-	tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
-	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	if (tail == head)
-		return -ENOMEM;
-	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+	left      = (head - next_tail) % iommu->cmd_buf_size;
+
+	if (left <= 2) {
+		struct iommu_cmd sync_cmd;
+		volatile u64 sem = 0;
+		int ret;
+
+		build_completion_wait(&sync_cmd, (u64)&sem);
+		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+
+		spin_unlock_irqrestore(&iommu->lock, flags);
+
+		if ((ret = wait_on_sem(&sem)) != 0)
+			return ret;
+
+		goto again;
+	}
+
+	copy_cmd_to_buffer(iommu, cmd, tail);
+
+	/* We need to sync now to make sure all commands are processed */
 	iommu->need_sync = true;
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return 0;
@@ -464,7 +516,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
 	volatile u64 sem = 0;
-	int ret, i = 0;
+	int ret;
 
 	if (!iommu->need_sync)
 		return 0;
@@ -475,17 +527,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	if (ret)
 		return ret;
 
-	while (sem == 0 && i < LOOP_TIMEOUT) {
-		udelay(1);
-		i += 1;
-	}
-
-	if (i == LOOP_TIMEOUT) {
-		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
-		ret = -EIO;
-	}
-
-	return 0;
+	return wait_on_sem(&sem);
 }
 
 /*
-- 
cgit v0.10.2


From d8c13085775c72e2d46edc54ed0c803c3a944ddb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:51:26 +0200
Subject: x86/amd-iommu: Rename iommu_flush_device

This function operates on a struct device, so give it a name
that represents that. As a side effect a new function is
introduced which operates on am iommu and a device-id. It
will be used again in a later patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 75c7f8c..3557f22 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -530,21 +530,27 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	return wait_on_sem(&sem);
 }
 
+static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
+{
+	struct iommu_cmd cmd;
+
+	build_inv_dte(&cmd, devid);
+
+	return iommu_queue_command(iommu, &cmd);
+}
+
 /*
  * Command send function for invalidating a device table entry
  */
-static int iommu_flush_device(struct device *dev)
+static int device_flush_dte(struct device *dev)
 {
 	struct amd_iommu *iommu;
-	struct iommu_cmd cmd;
 	u16 devid;
 
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	build_inv_dte(&cmd, devid);
-
-	return iommu_queue_command(iommu, &cmd);
+	return iommu_flush_dte(iommu, devid);
 }
 
 /*
@@ -620,7 +626,7 @@ static void domain_flush_devices(struct protection_domain *domain)
 	spin_lock_irqsave(&domain->lock, flags);
 
 	list_for_each_entry(dev_data, &domain->dev_list, list)
-		iommu_flush_device(dev_data->dev);
+		device_flush_dte(dev_data->dev);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -1424,7 +1430,7 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 	domain->dev_cnt                 += 1;
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 static void do_detach(struct device *dev)
@@ -1447,7 +1453,7 @@ static void do_detach(struct device *dev)
 	clear_dte_entry(devid);
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 /*
@@ -1663,7 +1669,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		goto out;
 	}
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 
 out:
@@ -2448,7 +2454,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	if (!iommu)
 		return;
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 }
 
-- 
cgit v0.10.2


From 7d0c5cc5be73f7ce26fdcca7b8ec2203f661eb93 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 7 Apr 2011 08:16:10 +0200
Subject: x86/amd-iommu: Flush all internal TLBs when IOMMUs are enabled

The old code only flushed a DTE or a domain TLB before it is
actually used by the IOMMU driver. While this is efficient
and works when done right it is more likely to introduce new
bugs when changing code (which happened in the past).
This patch adds code to flush all DTEs and all domain TLBs
in each IOMMU right after it is enabled (at boot and after
resume). This reduces the complexity of the driver and makes
it less likely to introduce stale-TLB bugs in the future.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc81..1223c0f 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -24,8 +24,6 @@ struct amd_iommu;
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
 extern int amd_iommu_init_devices(void);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3557f22..bcf58ea 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -539,6 +539,40 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
 	return iommu_queue_command(iommu, &cmd);
 }
 
+static void iommu_flush_dte_all(struct amd_iommu *iommu)
+{
+	u32 devid;
+
+	for (devid = 0; devid <= 0xffff; ++devid)
+		iommu_flush_dte(iommu, devid);
+
+	iommu_completion_wait(iommu);
+}
+
+/*
+ * This function uses heavy locking and may disable irqs for some time. But
+ * this is no issue because it is only called during resume.
+ */
+static void iommu_flush_tlb_all(struct amd_iommu *iommu)
+{
+	u32 dom_id;
+
+	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
+		struct iommu_cmd cmd;
+		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      dom_id, 1);
+		iommu_queue_command(iommu, &cmd);
+	}
+
+	iommu_completion_wait(iommu);
+}
+
+void iommu_flush_all_caches(struct amd_iommu *iommu)
+{
+	iommu_flush_dte_all(iommu);
+	iommu_flush_tlb_all(iommu);
+}
+
 /*
  * Command send function for invalidating a device table entry
  */
@@ -631,47 +665,6 @@ static void domain_flush_devices(struct protection_domain *domain)
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
-static void iommu_flush_all_domain_devices(void)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		domain_flush_devices(domain);
-		domain_flush_complete(domain);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-void amd_iommu_flush_all_devices(void)
-{
-	iommu_flush_all_domain_devices();
-}
-
-/*
- * This function uses heavy locking and may disable irqs for some time. But
- * this is no issue because it is only called during resume.
- */
-void amd_iommu_flush_all_domains(void)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		spin_lock(&domain->lock);
-		domain_flush_tlb_pde(domain);
-		domain_flush_complete(domain);
-		spin_unlock(&domain->lock);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727..8848dda 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -180,6 +180,12 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -1244,6 +1250,7 @@ static void enable_iommus(void)
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable(iommu);
+		iommu_flush_all_caches(iommu);
 	}
 }
 
@@ -1274,8 +1281,8 @@ static void amd_iommu_resume(void)
 	 * we have to flush after the IOMMUs are enabled because a
 	 * disabled IOMMU will never execute the commands we send
 	 */
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	for_each_iommu(iommu)
+		iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
-- 
cgit v0.10.2


From ba4b87ad5497cba555954885db99c99627f93748 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 31 Mar 2011 08:08:09 -0400
Subject: dma-debug: print information about leaked entry

When driver leak dma mapping, print additional information about one of
leaked entries, to to help investigate problem. Patch should be useful
for debugging drivers, which maps many different class of buffers.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4bfb047..db07bfd 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,7 +649,7 @@ out_err:
 	return -ENOMEM;
 }
 
-static int device_dma_allocations(struct device *dev)
+static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
 {
 	struct dma_debug_entry *entry;
 	unsigned long flags;
@@ -660,8 +660,10 @@ static int device_dma_allocations(struct device *dev)
 	for (i = 0; i < HASH_SIZE; ++i) {
 		spin_lock(&dma_entry_hash[i].lock);
 		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-			if (entry->dev == dev)
+			if (entry->dev == dev) {
 				count += 1;
+				*out_entry = entry;
+			}
 		}
 		spin_unlock(&dma_entry_hash[i].lock);
 	}
@@ -674,6 +676,7 @@ static int device_dma_allocations(struct device *dev)
 static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct device *dev = data;
+	struct dma_debug_entry *uninitialized_var(entry);
 	int count;
 
 	if (global_disable)
@@ -681,12 +684,17 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
 
 	switch (action) {
 	case BUS_NOTIFY_UNBOUND_DRIVER:
-		count = device_dma_allocations(dev);
+		count = device_dma_allocations(dev, &entry);
 		if (count == 0)
 			break;
-		err_printk(dev, NULL, "DMA-API: device driver has pending "
+		err_printk(dev, entry, "DMA-API: device driver has pending "
 				"DMA allocations while released from device "
-				"[count=%d]\n", count);
+				"[count=%d]\n"
+				"One of leaked entries details: "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [mapped as %s]\n",
+			count, entry->dev_addr, entry->size,
+			dir2name[entry->direction], type2name[entry->type]);
 		break;
 	default:
 		break;
-- 
cgit v0.10.2


From 5cdede2408e80f190c5595e592c24e77c1bf44b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 Apr 2011 15:55:18 +0200
Subject: PCI: Move ATS declarations in seperate header file

This patch moves the relevant declarations from the local
header file in drivers/pci to a more accessible locations so
that it can be used by the AMD IOMMU driver too.
The file is named pci-ats.h because support for the PCI PRI
capability will also be added there in a later patch-set.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7da3bef..fdb2cef 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
+#include <linux/pci-ats.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee..42fae47 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/pci-ats.h>
 #include "pci.h"
 
 #define VIRTFN_ID_LEN	16
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a6ec200..4020025 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -250,15 +250,6 @@ struct pci_sriov {
 	u8 __iomem *mstate;	/* VF Migration State Array */
 };
 
-/* Address Translation Service */
-struct pci_ats {
-	int pos;	/* capability position */
-	int stu;	/* Smallest Translation Unit */
-	int qdep;	/* Invalidate Queue Depth */
-	int ref_cnt;	/* Physical Function reference count */
-	unsigned int is_enabled:1;	/* Enable bit is set */
-};
-
 #ifdef CONFIG_PCI_IOV
 extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +260,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
 
-extern int pci_enable_ats(struct pci_dev *dev, int ps);
-extern void pci_disable_ats(struct pci_dev *dev);
-extern int pci_ats_queue_depth(struct pci_dev *dev);
-/**
- * pci_ats_enabled - query the ATS status
- * @dev: the PCI device
- *
- * Returns 1 if ATS capability is enabled, or 0 if not.
- */
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return dev->ats && dev->ats->is_enabled;
-}
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -304,21 +282,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
 	return 0;
 }
 
-static inline int pci_enable_ats(struct pci_dev *dev, int ps)
-{
-	return -ENODEV;
-}
-static inline void pci_disable_ats(struct pci_dev *dev)
-{
-}
-static inline int pci_ats_queue_depth(struct pci_dev *dev)
-{
-	return -ENODEV;
-}
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return 0;
-}
 #endif /* CONFIG_PCI_IOV */
 
 static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
new file mode 100644
index 0000000..655824f
--- /dev/null
+++ b/include/linux/pci-ats.h
@@ -0,0 +1,52 @@
+#ifndef LINUX_PCI_ATS_H
+#define LINUX_PCI_ATS_H
+
+/* Address Translation Service */
+struct pci_ats {
+	int pos;        /* capability position */
+	int stu;        /* Smallest Translation Unit */
+	int qdep;       /* Invalidate Queue Depth */
+	int ref_cnt;    /* Physical Function reference count */
+	unsigned int is_enabled:1;      /* Enable bit is set */
+};
+
+#ifdef CONFIG_PCI_IOV
+
+extern int pci_enable_ats(struct pci_dev *dev, int ps);
+extern void pci_disable_ats(struct pci_dev *dev);
+extern int pci_ats_queue_depth(struct pci_dev *dev);
+/**
+ * pci_ats_enabled - query the ATS status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ATS capability is enabled, or 0 if not.
+ */
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return dev->ats && dev->ats->is_enabled;
+}
+
+#else /* CONFIG_PCI_IOV */
+
+static inline int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	return -ENODEV;
+}
+
+static inline void pci_disable_ats(struct pci_dev *dev)
+{
+}
+
+static inline int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+#endif /* LINUX_PCI_ATS_H*/
-- 
cgit v0.10.2


From 9844b4e5dd1932e175a23d84ce09702bdf4b5689 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 09:22:56 +0200
Subject: x86/amd-iommu: Select PCI_IOV with AMD IOMMU driver

In order to support ATS in the AMD IOMMU driver this patch
makes sure that the generic support for ATS is compiled in.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a..8cc29da 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -690,6 +690,7 @@ config AMD_IOMMU
 	bool "AMD IOMMU support"
 	select SWIOTLB
 	select PCI_MSI
+	select PCI_IOV
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
-- 
cgit v0.10.2


From cb41ed85efa01e633388314c03a4f3004c6b783b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 11:00:53 +0200
Subject: x86/amd-iommu: Flush device IOTLB if ATS is enabled

This patch implements a function to flush the IOTLB on
devices supporting ATS and makes sure that this TLB is also
flushed if necessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 878ae00..f5d184e7 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -113,7 +113,8 @@
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
-#define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_IOMMU_PAGES	0x03
+#define CMD_INV_IOTLB_PAGES	0x04
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bcf58ea..f3ce433 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -463,6 +464,37 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 }
 
+static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
+				  u64 address, size_t size)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0]  = devid;
+	cmd->data[0] |= (qdep & 0xff) << 24;
+	cmd->data[1]  = devid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+	if (s)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
@@ -574,17 +606,47 @@ void iommu_flush_all_caches(struct amd_iommu *iommu)
 }
 
 /*
+ * Command send function for flushing on-device TLB
+ */
+static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct amd_iommu *iommu;
+	struct iommu_cmd cmd;
+	u16 devid;
+	int qdep;
+
+	qdep  = pci_ats_queue_depth(pdev);
+	devid = get_device_id(dev);
+	iommu = amd_iommu_rlookup_table[devid];
+
+	build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
+
+	return iommu_queue_command(iommu, &cmd);
+}
+
+/*
  * Command send function for invalidating a device table entry
  */
 static int device_flush_dte(struct device *dev)
 {
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
 	u16 devid;
+	int ret;
 
+	pdev  = to_pci_dev(dev);
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	return iommu_flush_dte(iommu, devid);
+	ret = iommu_flush_dte(iommu, devid);
+	if (ret)
+		return ret;
+
+	if (pci_ats_enabled(pdev))
+		ret = device_flush_iotlb(dev, 0, ~0UL);
+
+	return ret;
 }
 
 /*
@@ -595,6 +657,7 @@ static int device_flush_dte(struct device *dev)
 static void __domain_flush_pages(struct protection_domain *domain,
 				 u64 address, size_t size, int pde)
 {
+	struct iommu_dev_data *dev_data;
 	struct iommu_cmd cmd;
 	int ret = 0, i;
 
@@ -611,6 +674,15 @@ static void __domain_flush_pages(struct protection_domain *domain,
 		ret |= iommu_queue_command(amd_iommus[i], &cmd);
 	}
 
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
+
+		if (!pci_ats_enabled(pdev))
+			continue;
+
+		ret |= device_flush_iotlb(dev_data->dev, address, size);
+	}
+
 	WARN_ON(ret);
 }
 
-- 
cgit v0.10.2


From 60f723b4117507c05c8b0b5c8b98ecc12a76878e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 12:50:24 +0200
Subject: x86/amd-iommu: Add flag to indicate IOTLB support

This patch adds a flag to the AMD IOMMU driver to indicate
that all IOMMUs present in the system support device IOTLBs.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index f5d184e7..cb811c9 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -250,6 +250,8 @@ extern bool amd_iommu_dump;
 
 /* global flag if IOMMUs cache non-present entries */
 extern bool amd_iommu_np_cache;
+/* Only true if all IOMMUs support device IOTLBs */
+extern bool amd_iommu_iotlb_sup;
 
 /*
  * Make iterating over all IOMMUs easier
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8848dda..b6c634f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
 
 /* IOMMUs have a non-present cache? */
 bool amd_iommu_np_cache __read_mostly;
+bool amd_iommu_iotlb_sup __read_mostly = true;
 
 /*
  * The ACPI table parsing functions set this variable on an error
@@ -673,6 +674,9 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
-- 
cgit v0.10.2


From fd7b5535e10ce820f030842da3f289f80ec0d4f3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 15:31:08 +0200
Subject: x86/amd-iommu: Add ATS enable/disable code

This patch adds the necessary code to the AMD IOMMU driver
for enabling and disabling the ATS capability on a device
and to setup the IOMMU data structures correctly.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index cb811c9..7434377 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -216,6 +216,8 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+#define DTE_FLAG_IOTLB	0x01
+
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f3ce433..e4791f6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1452,17 +1452,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
 	return domain->flags & PD_DMA_OPS_MASK;
 }
 
-static void set_dte_entry(u16 devid, struct protection_domain *domain)
+static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
 	u64 pte_root = virt_to_phys(domain->pt_root);
+	u32 flags = 0;
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
-	amd_iommu_dev_table[devid].data[2] = domain->id;
-	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+	if (ats)
+		flags |= DTE_FLAG_IOTLB;
+
+	amd_iommu_dev_table[devid].data[3] |= flags;
+	amd_iommu_dev_table[devid].data[2]  = domain->id;
+	amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
+	amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1479,16 +1484,22 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	bool ats = false;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
+
+	if (amd_iommu_iotlb_sup)
+		ats = pci_ats_enabled(pdev);
 
 	/* Update data structures */
 	dev_data->domain = domain;
 	list_add(&dev_data->list, &domain->dev_list);
-	set_dte_entry(devid, domain);
+	set_dte_entry(devid, domain, ats);
 
 	/* Do reference counting */
 	domain->dev_iommu[iommu->index] += 1;
@@ -1502,11 +1513,13 @@ static void do_detach(struct device *dev)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
 
 	/* decrease reference counters */
 	dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1581,9 +1594,13 @@ out_unlock:
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 	int ret;
 
+	if (amd_iommu_iotlb_sup)
+		pci_enable_ats(pdev, PAGE_SHIFT);
+
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1640,12 +1657,16 @@ static void __detach_device(struct device *dev)
  */
 static void detach_device(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 
 	/* lock device table */
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
+		pci_disable_ats(pdev);
 }
 
 /*
@@ -1795,8 +1816,9 @@ static void update_device_table(struct protection_domain *domain)
 	struct iommu_dev_data *dev_data;
 
 	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
 		u16 devid = get_device_id(dev_data->dev);
-		set_dte_entry(devid, domain);
+		set_dte_entry(devid, domain, pci_ats_enabled(pdev));
 	}
 }
 
-- 
cgit v0.10.2


From d99ddec3eee0be8a43b2c1ff624b9dfaaa26b959 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 11 Apr 2011 11:03:18 +0200
Subject: x86/amd-iommu: Add extended feature detection

This patch adds detection of the extended features of an
AMD IOMMU. The available features are printed to dmesg on
boot.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 1223c0f..a4ae6c3 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,7 +19,7 @@
 #ifndef _ASM_X86_AMD_IOMMU_PROTO_H
 #define _ASM_X86_AMD_IOMMU_PROTO_H
 
-struct amd_iommu;
+#include <asm/amd_iommu_types.h>
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
@@ -42,4 +42,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
 	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
 }
 
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+		return false;
+
+	return !!(iommu->features & f);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 878ae00..5c24e46 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
 #define MMIO_CONTROL_OFFSET     0x0018
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
+#define MMIO_EXT_FEATURES	0x0030
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
 #define MMIO_EVT_TAIL_OFFSET	0x2018
 #define MMIO_STATUS_OFFSET	0x2020
 
+
+/* Extended Feature Bits */
+#define FEATURE_PREFETCH	(1ULL<<0)
+#define FEATURE_PPR		(1ULL<<1)
+#define FEATURE_X2APIC		(1ULL<<2)
+#define FEATURE_NX		(1ULL<<3)
+#define FEATURE_GT		(1ULL<<4)
+#define FEATURE_IA		(1ULL<<6)
+#define FEATURE_GA		(1ULL<<7)
+#define FEATURE_HE		(1ULL<<8)
+#define FEATURE_PC		(1ULL<<9)
+
 /* MMIO status bits */
 #define MMIO_STATUS_COM_WAIT_INT_MASK	0x04
 
@@ -227,6 +240,7 @@
 /* IOMMU capabilities */
 #define IOMMU_CAP_IOTLB   24
 #define IOMMU_CAP_NPCACHE 26
+#define IOMMU_CAP_EFR     27
 
 #define MAX_DOMAIN_ID 65536
 
@@ -371,6 +385,9 @@ struct amd_iommu {
 	/* flags read from acpi table */
 	u8 acpi_flags;
 
+	/* Extended features */
+	u64 features;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8848dda..047905d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -299,9 +299,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC", NULL
+	};
+	int i;
+
+	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
+	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+		printk(KERN_CONT " extended features: ");
+		for (i = 0; feat_str[i]; ++i)
+			if (iommu_feature(iommu, (1ULL << i)))
+				printk(KERN_CONT " %s", feat_str[i]);
+	}
+	printk(KERN_CONT "\n");
+
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -657,7 +671,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc;
+	u32 range, misc, low, high;
 	int i, j;
 
 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -673,6 +687,12 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
-- 
cgit v0.10.2


From 58fc7f1419560efa9c426b829c195050e0147d7f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 11 Apr 2011 11:13:24 +0200
Subject: x86/amd-iommu: Add support for invalidate_all command

This patch adds support for the invalidate_all command
present in new versions of the AMD IOMMU.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 5c24e46..df62d26 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -127,6 +127,7 @@
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
 #define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_ALL		0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bcf58ea..d6192bc 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -463,6 +463,12 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 }
 
+static void build_inv_all(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	CMD_SET_TYPE(cmd, CMD_INV_ALL);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
@@ -567,10 +573,24 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu)
 	iommu_completion_wait(iommu);
 }
 
+static void iommu_flush_all(struct amd_iommu *iommu)
+{
+	struct iommu_cmd cmd;
+
+	build_inv_all(&cmd);
+
+	iommu_queue_command(iommu, &cmd);
+	iommu_completion_wait(iommu);
+}
+
 void iommu_flush_all_caches(struct amd_iommu *iommu)
 {
-	iommu_flush_dte_all(iommu);
-	iommu_flush_tlb_all(iommu);
+	if (iommu_feature(iommu, FEATURE_IA)) {
+		iommu_flush_all(iommu);
+	} else {
+		iommu_flush_dte_all(iommu);
+		iommu_flush_tlb_all(iommu);
+	}
 }
 
 /*
-- 
cgit v0.10.2


From e969687595c27e02e02be0c9363261826123ba77 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 5 Nov 2010 16:12:35 -0700
Subject: arch/x86/kernel/pci-iommu_table.c: Convert sprintf_symbol to %pS

Coalesce format as well.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745e..35ccf75 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 				struct iommu_table_entry *finish)
 {
 	struct iommu_table_entry *p, *q, *x;
-	char sym_p[KSYM_SYMBOL_LEN];
-	char sym_q[KSYM_SYMBOL_LEN];
 
 	/* Simple cyclic dependency checker. */
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(start, finish, p);
 		x = find_dependents_of(start, finish, q);
 		if (p == x) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
-					" on %s and vice-versa. BREAKING IT.\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
+			       p->detect, q->detect);
 			/* Heavy handed way..*/
 			x->depend = 0;
 		}
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(p, finish, p);
 		if (q && q > p) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
-					"should be called before %s!\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
+			       p->detect, q->detect);
 		}
 	}
 }
-- 
cgit v0.10.2


From 72fe00f01f9a3240a1073be27aeaf4fc476cc662 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 10 May 2011 10:50:42 +0200
Subject: x86/amd-iommu: Use threaded interupt handler

Move the interupt handling for the iommu into the interupt
thread to reduce latencies and prepare interupt handling for
pri handling.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index a4ae6c3..55d95eb 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -23,6 +23,7 @@
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index dc5ddda..873e7e1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -366,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-irqreturn_t amd_iommu_int_handler(int irq, void *data)
+irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
@@ -376,6 +376,11 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+	return IRQ_WAKE_THREAD;
+}
+
 /****************************************************************************
  *
  * IOMMU command queuing functions
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 28b0781..9179c21 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1034,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	if (pci_enable_msi(iommu->dev))
 		return 1;
 
-	r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
-			IRQF_SAMPLE_RANDOM,
-			"AMD-Vi",
-			NULL);
+	r = request_threaded_irq(iommu->dev->irq,
+				 amd_iommu_int_handler,
+				 amd_iommu_int_thread,
+				 0, "AMD-Vi",
+				 iommu->dev);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
-- 
cgit v0.10.2


From fffcda1183e93df84ad73ba7eb7782a5c354e2b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 10 May 2011 17:22:06 +0200
Subject: x86, gart: Rename pci-gart_64.c to amd_gart_64.c

This file only contains code relevant for the northbridge
gart in AMD processors. This patch renames the file to
represent this fact in the filename.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 092e596..c54b4f5 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -206,7 +206,7 @@ IOMMU (input/output memory management unit)
       (e.g. because you have < 3 GB memory).
       Kernel boot message: "PCI-DMA: Disabling IOMMU"
 
-   2. <arch/x86_64/kernel/pci-gart.c>: AMD GART based hardware IOMMU.
+   2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
       Kernel boot message: "PCI-DMA: using GART IOMMU"
 
    3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2..97ebf82e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF)			+= devicetree.o
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
-	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
 	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 	obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
 
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
new file mode 100644
index 0000000..b117efd
--- /dev/null
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -0,0 +1,898 @@
+/*
+ * Dynamic DMA mapping support for AMD Hammer.
+ *
+ * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
+ * This allows to use PCI devices that only support 32bit addresses on systems
+ * with more than 4GB.
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
+ *
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU General Public License v2 only.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/agp_backend.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+#include <linux/interrupt.h>
+#include <linux/bitmap.h>
+#include <linux/kdebug.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
+#include <linux/syscore_ops.h>
+#include <linux/io.h>
+#include <linux/gfp.h>
+#include <asm/atomic.h>
+#include <asm/mtrr.h>
+#include <asm/pgtable.h>
+#include <asm/proto.h>
+#include <asm/iommu.h>
+#include <asm/gart.h>
+#include <asm/cacheflush.h>
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/amd_nb.h>
+#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
+
+static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
+static unsigned long iommu_size;	/* size of remapping area bytes */
+static unsigned long iommu_pages;	/* .. and in pages */
+
+static u32 *iommu_gatt_base;		/* Remapping table */
+
+static dma_addr_t bad_dma_addr;
+
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
+static int iommu_fullflush = 1;
+
+/* Allocation bitmap for the remapping area: */
+static DEFINE_SPINLOCK(iommu_bitmap_lock);
+/* Guarded by iommu_bitmap_lock: */
+static unsigned long *iommu_gart_bitmap;
+
+static u32 gart_unmapped_entry;
+
+#define GPTE_VALID    1
+#define GPTE_COHERENT 2
+#define GPTE_ENCODE(x) \
+	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
+#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
+
+#define EMERGENCY_PAGES 32 /* = 128KB */
+
+#ifdef CONFIG_AGP
+#define AGPEXTERN extern
+#else
+#define AGPEXTERN
+#endif
+
+/* GART can only remap to physical addresses < 1TB */
+#define GART_MAX_PHYS_ADDR	(1ULL << 40)
+
+/* backdoor interface to AGP driver */
+AGPEXTERN int agp_memory_reserved;
+AGPEXTERN __u32 *agp_gatt_table;
+
+static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
+static bool need_flush;		/* global flush state. set for each gart wrap */
+
+static unsigned long alloc_iommu(struct device *dev, int size,
+				 unsigned long align_mask)
+{
+	unsigned long offset, flags;
+	unsigned long boundary_size;
+	unsigned long base_index;
+
+	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
+			   PAGE_SIZE) >> PAGE_SHIFT;
+	boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
+				  size, base_index, boundary_size, align_mask);
+	if (offset == -1) {
+		need_flush = true;
+		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
+					  size, base_index, boundary_size,
+					  align_mask);
+	}
+	if (offset != -1) {
+		next_bit = offset+size;
+		if (next_bit >= iommu_pages) {
+			next_bit = 0;
+			need_flush = true;
+		}
+	}
+	if (iommu_fullflush)
+		need_flush = true;
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+
+	return offset;
+}
+
+static void free_iommu(unsigned long offset, int size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	bitmap_clear(iommu_gart_bitmap, offset, size);
+	if (offset >= next_bit)
+		next_bit = offset + size;
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+}
+
+/*
+ * Use global flush state to avoid races with multiple flushers.
+ */
+static void flush_gart(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	if (need_flush) {
+		amd_flush_garts();
+		need_flush = false;
+	}
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+}
+
+#ifdef CONFIG_IOMMU_LEAK
+/* Debugging aid for drivers that don't free their IOMMU tables */
+static int leak_trace;
+static int iommu_leak_pages = 20;
+
+static void dump_leak(void)
+{
+	static int dump;
+
+	if (dump)
+		return;
+	dump = 1;
+
+	show_stack(NULL, NULL);
+	debug_dma_dump_mappings(NULL);
+}
+#endif
+
+static void iommu_full(struct device *dev, size_t size, int dir)
+{
+	/*
+	 * Ran out of IOMMU space for this operation. This is very bad.
+	 * Unfortunately the drivers cannot handle this operation properly.
+	 * Return some non mapped prereserved space in the aperture and
+	 * let the Northbridge deal with it. This will result in garbage
+	 * in the IO operation. When the size exceeds the prereserved space
+	 * memory corruption will occur or random memory will be DMAed
+	 * out. Hopefully no network devices use single mappings that big.
+	 */
+
+	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
+
+	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
+		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic("PCI-DMA: Memory would be corrupted\n");
+		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic(KERN_ERR
+				"PCI-DMA: Random memory would be DMAed\n");
+	}
+#ifdef CONFIG_IOMMU_LEAK
+	dump_leak();
+#endif
+}
+
+static inline int
+need_iommu(struct device *dev, unsigned long addr, size_t size)
+{
+	return force_iommu || !dma_capable(dev, addr, size);
+}
+
+static inline int
+nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
+{
+	return !dma_capable(dev, addr, size);
+}
+
+/* Map a single continuous physical area into the IOMMU.
+ * Caller needs to check if the iommu is needed and flush.
+ */
+static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
+				size_t size, int dir, unsigned long align_mask)
+{
+	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
+	unsigned long iommu_page;
+	int i;
+
+	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
+		return bad_dma_addr;
+
+	iommu_page = alloc_iommu(dev, npages, align_mask);
+	if (iommu_page == -1) {
+		if (!nonforced_iommu(dev, phys_mem, size))
+			return phys_mem;
+		if (panic_on_overflow)
+			panic("dma_map_area overflow %lu bytes\n", size);
+		iommu_full(dev, size, dir);
+		return bad_dma_addr;
+	}
+
+	for (i = 0; i < npages; i++) {
+		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
+		phys_mem += PAGE_SIZE;
+	}
+	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
+}
+
+/* Map a single area into the IOMMU */
+static dma_addr_t gart_map_page(struct device *dev, struct page *page,
+				unsigned long offset, size_t size,
+				enum dma_data_direction dir,
+				struct dma_attrs *attrs)
+{
+	unsigned long bus;
+	phys_addr_t paddr = page_to_phys(page) + offset;
+
+	if (!dev)
+		dev = &x86_dma_fallback_dev;
+
+	if (!need_iommu(dev, paddr, size))
+		return paddr;
+
+	bus = dma_map_area(dev, paddr, size, dir, 0);
+	flush_gart();
+
+	return bus;
+}
+
+/*
+ * Free a DMA mapping.
+ */
+static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
+			    size_t size, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	unsigned long iommu_page;
+	int npages;
+	int i;
+
+	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+	    dma_addr >= iommu_bus_base + iommu_size)
+		return;
+
+	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+	for (i = 0; i < npages; i++) {
+		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
+	}
+	free_iommu(iommu_page, npages);
+}
+
+/*
+ * Wrapper for pci_unmap_single working with scatterlists.
+ */
+static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+			  enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!s->dma_length || !s->length)
+			break;
+		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+	}
+}
+
+/* Fallback for dma_map_sg in case of overflow */
+static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
+			       int nents, int dir)
+{
+	struct scatterlist *s;
+	int i;
+
+#ifdef CONFIG_IOMMU_DEBUG
+	pr_debug("dma_map_sg overflow\n");
+#endif
+
+	for_each_sg(sg, s, nents, i) {
+		unsigned long addr = sg_phys(s);
+
+		if (nonforced_iommu(dev, addr, s->length)) {
+			addr = dma_map_area(dev, addr, s->length, dir, 0);
+			if (addr == bad_dma_addr) {
+				if (i > 0)
+					gart_unmap_sg(dev, sg, i, dir, NULL);
+				nents = 0;
+				sg[0].dma_length = 0;
+				break;
+			}
+		}
+		s->dma_address = addr;
+		s->dma_length = s->length;
+	}
+	flush_gart();
+
+	return nents;
+}
+
+/* Map multiple scatterlist entries continuous into the first. */
+static int __dma_map_cont(struct device *dev, struct scatterlist *start,
+			  int nelems, struct scatterlist *sout,
+			  unsigned long pages)
+{
+	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
+	unsigned long iommu_page = iommu_start;
+	struct scatterlist *s;
+	int i;
+
+	if (iommu_start == -1)
+		return -1;
+
+	for_each_sg(start, s, nelems, i) {
+		unsigned long pages, addr;
+		unsigned long phys_addr = s->dma_address;
+
+		BUG_ON(s != start && s->offset);
+		if (s == start) {
+			sout->dma_address = iommu_bus_base;
+			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
+			sout->dma_length = s->length;
+		} else {
+			sout->dma_length += s->length;
+		}
+
+		addr = phys_addr;
+		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
+		while (pages--) {
+			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
+			addr += PAGE_SIZE;
+			iommu_page++;
+		}
+	}
+	BUG_ON(iommu_page - iommu_start != pages);
+
+	return 0;
+}
+
+static inline int
+dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
+	     struct scatterlist *sout, unsigned long pages, int need)
+{
+	if (!need) {
+		BUG_ON(nelems != 1);
+		sout->dma_address = start->dma_address;
+		sout->dma_length = start->length;
+		return 0;
+	}
+	return __dma_map_cont(dev, start, nelems, sout, pages);
+}
+
+/*
+ * DMA map all entries in a scatterlist.
+ * Merge chunks that have page aligned sizes into a continuous mapping.
+ */
+static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		       enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s, *ps, *start_sg, *sgmap;
+	int need = 0, nextneed, i, out, start;
+	unsigned long pages = 0;
+	unsigned int seg_size;
+	unsigned int max_seg_size;
+
+	if (nents == 0)
+		return 0;
+
+	if (!dev)
+		dev = &x86_dma_fallback_dev;
+
+	out		= 0;
+	start		= 0;
+	start_sg	= sg;
+	sgmap		= sg;
+	seg_size	= 0;
+	max_seg_size	= dma_get_max_seg_size(dev);
+	ps		= NULL; /* shut up gcc */
+
+	for_each_sg(sg, s, nents, i) {
+		dma_addr_t addr = sg_phys(s);
+
+		s->dma_address = addr;
+		BUG_ON(s->length == 0);
+
+		nextneed = need_iommu(dev, addr, s->length);
+
+		/* Handle the previous not yet processed entries */
+		if (i > start) {
+			/*
+			 * Can only merge when the last chunk ends on a
+			 * page boundary and the new one doesn't have an
+			 * offset.
+			 */
+			if (!iommu_merge || !nextneed || !need || s->offset ||
+			    (s->length + seg_size > max_seg_size) ||
+			    (ps->offset + ps->length) % PAGE_SIZE) {
+				if (dma_map_cont(dev, start_sg, i - start,
+						 sgmap, pages, need) < 0)
+					goto error;
+				out++;
+
+				seg_size	= 0;
+				sgmap		= sg_next(sgmap);
+				pages		= 0;
+				start		= i;
+				start_sg	= s;
+			}
+		}
+
+		seg_size += s->length;
+		need = nextneed;
+		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
+		ps = s;
+	}
+	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
+		goto error;
+	out++;
+	flush_gart();
+	if (out < nents) {
+		sgmap = sg_next(sgmap);
+		sgmap->dma_length = 0;
+	}
+	return out;
+
+error:
+	flush_gart();
+	gart_unmap_sg(dev, sg, out, dir, NULL);
+
+	/* When it was forced or merged try again in a dumb way */
+	if (force_iommu || iommu_merge) {
+		out = dma_map_sg_nonforce(dev, sg, nents, dir);
+		if (out > 0)
+			return out;
+	}
+	if (panic_on_overflow)
+		panic("dma_map_sg: overflow on %lu pages\n", pages);
+
+	iommu_full(dev, pages << PAGE_SHIFT, dir);
+	for_each_sg(sg, s, nents, i)
+		s->dma_address = bad_dma_addr;
+	return 0;
+}
+
+/* allocate and map a coherent mapping */
+static void *
+gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
+		    gfp_t flag)
+{
+	dma_addr_t paddr;
+	unsigned long align_mask;
+	struct page *page;
+
+	if (force_iommu && !(flag & GFP_DMA)) {
+		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
+		if (!page)
+			return NULL;
+
+		align_mask = (1UL << get_order(size)) - 1;
+		paddr = dma_map_area(dev, page_to_phys(page), size,
+				     DMA_BIDIRECTIONAL, align_mask);
+
+		flush_gart();
+		if (paddr != bad_dma_addr) {
+			*dma_addr = paddr;
+			return page_address(page);
+		}
+		__free_pages(page, get_order(size));
+	} else
+		return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+
+	return NULL;
+}
+
+/* free a coherent mapping */
+static void
+gart_free_coherent(struct device *dev, size_t size, void *vaddr,
+		   dma_addr_t dma_addr)
+{
+	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return (dma_addr == bad_dma_addr);
+}
+
+static int no_agp;
+
+static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
+{
+	unsigned long a;
+
+	if (!iommu_size) {
+		iommu_size = aper_size;
+		if (!no_agp)
+			iommu_size /= 2;
+	}
+
+	a = aper + iommu_size;
+	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
+
+	if (iommu_size < 64*1024*1024) {
+		pr_warning(
+			"PCI-DMA: Warning: Small IOMMU %luMB."
+			" Consider increasing the AGP aperture in BIOS\n",
+				iommu_size >> 20);
+	}
+
+	return iommu_size;
+}
+
+static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
+{
+	unsigned aper_size = 0, aper_base_32, aper_order;
+	u64 aper_base;
+
+	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
+	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
+	aper_order = (aper_order >> 1) & 7;
+
+	aper_base = aper_base_32 & 0x7fff;
+	aper_base <<= 25;
+
+	aper_size = (32 * 1024 * 1024) << aper_order;
+	if (aper_base + aper_size > 0x100000000UL || !aper_size)
+		aper_base = 0;
+
+	*size = aper_size;
+	return aper_base;
+}
+
+static void enable_gart_translations(void)
+{
+	int i;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
+
+		enable_gart_translation(dev, __pa(agp_gatt_table));
+	}
+
+	/* Flush the GART-TLB to remove stale entries */
+	amd_flush_garts();
+}
+
+/*
+ * If fix_up_north_bridges is set, the north bridges have to be fixed up on
+ * resume in the same way as they are handled in gart_iommu_hole_init().
+ */
+static bool fix_up_north_bridges;
+static u32 aperture_order;
+static u32 aperture_alloc;
+
+void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
+{
+	fix_up_north_bridges = true;
+	aperture_order = aper_order;
+	aperture_alloc = aper_alloc;
+}
+
+static void gart_fixup_northbridges(void)
+{
+	int i;
+
+	if (!fix_up_north_bridges)
+		return;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	pr_info("PCI-DMA: Restoring GART aperture settings\n");
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
+
+		/*
+		 * Don't enable translations just yet.  That is the next
+		 * step.  Restore the pre-suspend aperture settings.
+		 */
+		gart_set_size_and_enable(dev, aperture_order);
+		pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
+	}
+}
+
+static void gart_resume(void)
+{
+	pr_info("PCI-DMA: Resuming GART IOMMU\n");
+
+	gart_fixup_northbridges();
+
+	enable_gart_translations();
+}
+
+static struct syscore_ops gart_syscore_ops = {
+	.resume		= gart_resume,
+
+};
+
+/*
+ * Private Northbridge GATT initialization in case we cannot use the
+ * AGP driver for some reason.
+ */
+static __init int init_amd_gatt(struct agp_kern_info *info)
+{
+	unsigned aper_size, gatt_size, new_aper_size;
+	unsigned aper_base, new_aper_base;
+	struct pci_dev *dev;
+	void *gatt;
+	int i;
+
+	pr_info("PCI-DMA: Disabling AGP.\n");
+
+	aper_size = aper_base = info->aper_size = 0;
+	dev = NULL;
+	for (i = 0; i < amd_nb_num(); i++) {
+		dev = node_to_amd_nb(i)->misc;
+		new_aper_base = read_aperture(dev, &new_aper_size);
+		if (!new_aper_base)
+			goto nommu;
+
+		if (!aper_base) {
+			aper_size = new_aper_size;
+			aper_base = new_aper_base;
+		}
+		if (aper_size != new_aper_size || aper_base != new_aper_base)
+			goto nommu;
+	}
+	if (!aper_base)
+		goto nommu;
+
+	info->aper_base = aper_base;
+	info->aper_size = aper_size >> 20;
+
+	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
+	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(gatt_size));
+	if (!gatt)
+		panic("Cannot allocate GATT table");
+	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
+		panic("Could not set GART PTEs to uncacheable pages");
+
+	agp_gatt_table = gatt;
+
+	register_syscore_ops(&gart_syscore_ops);
+
+	flush_gart();
+
+	pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
+	       aper_base, aper_size>>10);
+
+	return 0;
+
+ nommu:
+	/* Should not happen anymore */
+	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
+	       "falling back to iommu=soft.\n");
+	return -1;
+}
+
+static struct dma_map_ops gart_dma_ops = {
+	.map_sg				= gart_map_sg,
+	.unmap_sg			= gart_unmap_sg,
+	.map_page			= gart_map_page,
+	.unmap_page			= gart_unmap_page,
+	.alloc_coherent			= gart_alloc_coherent,
+	.free_coherent			= gart_free_coherent,
+	.mapping_error			= gart_mapping_error,
+};
+
+static void gart_iommu_shutdown(void)
+{
+	struct pci_dev *dev;
+	int i;
+
+	/* don't shutdown it if there is AGP installed */
+	if (!no_agp)
+		return;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		u32 ctl;
+
+		dev = node_to_amd_nb(i)->misc;
+		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
+
+		ctl &= ~GARTEN;
+
+		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+	}
+}
+
+int __init gart_iommu_init(void)
+{
+	struct agp_kern_info info;
+	unsigned long iommu_start;
+	unsigned long aper_base, aper_size;
+	unsigned long start_pfn, end_pfn;
+	unsigned long scratch;
+	long i;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return 0;
+
+#ifndef CONFIG_AGP_AMD64
+	no_agp = 1;
+#else
+	/* Makefile puts PCI initialization via subsys_initcall first. */
+	/* Add other AMD AGP bridge drivers here */
+	no_agp = no_agp ||
+		(agp_amd64_init() < 0) ||
+		(agp_copy_info(agp_bridge, &info) < 0);
+#endif
+
+	if (no_iommu ||
+	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
+	    !gart_iommu_aperture ||
+	    (no_agp && init_amd_gatt(&info) < 0)) {
+		if (max_pfn > MAX_DMA32_PFN) {
+			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
+			pr_warning("falling back to iommu=soft.\n");
+		}
+		return 0;
+	}
+
+	/* need to map that range */
+	aper_size	= info.aper_size << 20;
+	aper_base	= info.aper_base;
+	end_pfn		= (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+	}
+
+	pr_info("PCI-DMA: using GART IOMMU.\n");
+	iommu_size = check_iommu_size(info.aper_base, aper_size);
+	iommu_pages = iommu_size >> PAGE_SHIFT;
+
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+						      get_order(iommu_pages/8));
+	if (!iommu_gart_bitmap)
+		panic("Cannot allocate iommu bitmap\n");
+
+#ifdef CONFIG_IOMMU_LEAK
+	if (leak_trace) {
+		int ret;
+
+		ret = dma_debug_resize_entries(iommu_pages);
+		if (ret)
+			pr_debug("PCI-DMA: Cannot trace all the entries\n");
+	}
+#endif
+
+	/*
+	 * Out of IOMMU space handling.
+	 * Reserve some invalid pages at the beginning of the GART.
+	 */
+	bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+
+	pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
+	       iommu_size >> 20);
+
+	agp_memory_reserved	= iommu_size;
+	iommu_start		= aper_size - iommu_size;
+	iommu_bus_base		= info.aper_base + iommu_start;
+	bad_dma_addr		= iommu_bus_base;
+	iommu_gatt_base		= agp_gatt_table + (iommu_start>>PAGE_SHIFT);
+
+	/*
+	 * Unmap the IOMMU part of the GART. The alias of the page is
+	 * always mapped with cache enabled and there is no full cache
+	 * coherency across the GART remapping. The unmapping avoids
+	 * automatic prefetches from the CPU allocating cache lines in
+	 * there. All CPU accesses are done via the direct mapping to
+	 * the backing memory. The GART address is only used by PCI
+	 * devices.
+	 */
+	set_memory_np((unsigned long)__va(iommu_bus_base),
+				iommu_size >> PAGE_SHIFT);
+	/*
+	 * Tricky. The GART table remaps the physical memory range,
+	 * so the CPU wont notice potential aliases and if the memory
+	 * is remapped to UC later on, we might surprise the PCI devices
+	 * with a stray writeout of a cacheline. So play it sure and
+	 * do an explicit, full-scale wbinvd() _after_ having marked all
+	 * the pages as Not-Present:
+	 */
+	wbinvd();
+
+	/*
+	 * Now all caches are flushed and we can safely enable
+	 * GART hardware.  Doing it early leaves the possibility
+	 * of stale cache entries that can lead to GART PTE
+	 * errors.
+	 */
+	enable_gart_translations();
+
+	/*
+	 * Try to workaround a bug (thanks to BenH):
+	 * Set unmapped entries to a scratch page instead of 0.
+	 * Any prefetches that hit unmapped entries won't get an bus abort
+	 * then. (P2P bridge may be prefetching on DMA reads).
+	 */
+	scratch = get_zeroed_page(GFP_KERNEL);
+	if (!scratch)
+		panic("Cannot allocate iommu scratch page");
+	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
+	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
+		iommu_gatt_base[i] = gart_unmapped_entry;
+
+	flush_gart();
+	dma_ops = &gart_dma_ops;
+	x86_platform.iommu_shutdown = gart_iommu_shutdown;
+	swiotlb = 0;
+
+	return 0;
+}
+
+void __init gart_parse_options(char *p)
+{
+	int arg;
+
+#ifdef CONFIG_IOMMU_LEAK
+	if (!strncmp(p, "leak", 4)) {
+		leak_trace = 1;
+		p += 4;
+		if (*p == '=')
+			++p;
+		if (isdigit(*p) && get_option(&p, &arg))
+			iommu_leak_pages = arg;
+	}
+#endif
+	if (isdigit(*p) && get_option(&p, &arg))
+		iommu_size = arg;
+	if (!strncmp(p, "fullflush", 9))
+		iommu_fullflush = 1;
+	if (!strncmp(p, "nofullflush", 11))
+		iommu_fullflush = 0;
+	if (!strncmp(p, "noagp", 5))
+		no_agp = 1;
+	if (!strncmp(p, "noaperture", 10))
+		fix_aperture = 0;
+	/* duplicated from pci-dma.c */
+	if (!strncmp(p, "force", 5))
+		gart_iommu_aperture_allowed = 1;
+	if (!strncmp(p, "allowed", 7))
+		gart_iommu_aperture_allowed = 1;
+	if (!strncmp(p, "memaper", 7)) {
+		fallback_aper_force = 1;
+		p += 7;
+		if (*p == '=') {
+			++p;
+			if (get_option(&p, &arg))
+				fallback_aper_order = arg;
+		}
+	}
+}
+IOMMU_INIT_POST(gart_iommu_hole_init);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
deleted file mode 100644
index b117efd..0000000
--- a/arch/x86/kernel/pci-gart_64.c
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- * Dynamic DMA mapping support for AMD Hammer.
- *
- * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
- * This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB.
- *
- * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
- *
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU General Public License v2 only.
- */
-
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/agp_backend.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <linux/interrupt.h>
-#include <linux/bitmap.h>
-#include <linux/kdebug.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu-helper.h>
-#include <linux/syscore_ops.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
-#include <asm/atomic.h>
-#include <asm/mtrr.h>
-#include <asm/pgtable.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
-#include <asm/gart.h>
-#include <asm/cacheflush.h>
-#include <asm/swiotlb.h>
-#include <asm/dma.h>
-#include <asm/amd_nb.h>
-#include <asm/x86_init.h>
-#include <asm/iommu_table.h>
-
-static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
-static unsigned long iommu_size;	/* size of remapping area bytes */
-static unsigned long iommu_pages;	/* .. and in pages */
-
-static u32 *iommu_gatt_base;		/* Remapping table */
-
-static dma_addr_t bad_dma_addr;
-
-/*
- * If this is disabled the IOMMU will use an optimized flushing strategy
- * of only flushing when an mapping is reused. With it true the GART is
- * flushed for every mapping. Problem is that doing the lazy flush seems
- * to trigger bugs with some popular PCI cards, in particular 3ware (but
- * has been also also seen with Qlogic at least).
- */
-static int iommu_fullflush = 1;
-
-/* Allocation bitmap for the remapping area: */
-static DEFINE_SPINLOCK(iommu_bitmap_lock);
-/* Guarded by iommu_bitmap_lock: */
-static unsigned long *iommu_gart_bitmap;
-
-static u32 gart_unmapped_entry;
-
-#define GPTE_VALID    1
-#define GPTE_COHERENT 2
-#define GPTE_ENCODE(x) \
-	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
-#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
-#ifdef CONFIG_AGP
-#define AGPEXTERN extern
-#else
-#define AGPEXTERN
-#endif
-
-/* GART can only remap to physical addresses < 1TB */
-#define GART_MAX_PHYS_ADDR	(1ULL << 40)
-
-/* backdoor interface to AGP driver */
-AGPEXTERN int agp_memory_reserved;
-AGPEXTERN __u32 *agp_gatt_table;
-
-static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
-static bool need_flush;		/* global flush state. set for each gart wrap */
-
-static unsigned long alloc_iommu(struct device *dev, int size,
-				 unsigned long align_mask)
-{
-	unsigned long offset, flags;
-	unsigned long boundary_size;
-	unsigned long base_index;
-
-	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
-			   PAGE_SIZE) >> PAGE_SHIFT;
-	boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
-			      PAGE_SIZE) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
-				  size, base_index, boundary_size, align_mask);
-	if (offset == -1) {
-		need_flush = true;
-		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
-					  size, base_index, boundary_size,
-					  align_mask);
-	}
-	if (offset != -1) {
-		next_bit = offset+size;
-		if (next_bit >= iommu_pages) {
-			next_bit = 0;
-			need_flush = true;
-		}
-	}
-	if (iommu_fullflush)
-		need_flush = true;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-
-	return offset;
-}
-
-static void free_iommu(unsigned long offset, int size)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	bitmap_clear(iommu_gart_bitmap, offset, size);
-	if (offset >= next_bit)
-		next_bit = offset + size;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-/*
- * Use global flush state to avoid races with multiple flushers.
- */
-static void flush_gart(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	if (need_flush) {
-		amd_flush_garts();
-		need_flush = false;
-	}
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-#ifdef CONFIG_IOMMU_LEAK
-/* Debugging aid for drivers that don't free their IOMMU tables */
-static int leak_trace;
-static int iommu_leak_pages = 20;
-
-static void dump_leak(void)
-{
-	static int dump;
-
-	if (dump)
-		return;
-	dump = 1;
-
-	show_stack(NULL, NULL);
-	debug_dma_dump_mappings(NULL);
-}
-#endif
-
-static void iommu_full(struct device *dev, size_t size, int dir)
-{
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * Return some non mapped prereserved space in the aperture and
-	 * let the Northbridge deal with it. This will result in garbage
-	 * in the IO operation. When the size exceeds the prereserved space
-	 * memory corruption will occur or random memory will be DMAed
-	 * out. Hopefully no network devices use single mappings that big.
-	 */
-
-	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
-
-	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
-		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic(KERN_ERR
-				"PCI-DMA: Random memory would be DMAed\n");
-	}
-#ifdef CONFIG_IOMMU_LEAK
-	dump_leak();
-#endif
-}
-
-static inline int
-need_iommu(struct device *dev, unsigned long addr, size_t size)
-{
-	return force_iommu || !dma_capable(dev, addr, size);
-}
-
-static inline int
-nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{
-	return !dma_capable(dev, addr, size);
-}
-
-/* Map a single continuous physical area into the IOMMU.
- * Caller needs to check if the iommu is needed and flush.
- */
-static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
-				size_t size, int dir, unsigned long align_mask)
-{
-	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
-	unsigned long iommu_page;
-	int i;
-
-	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
-		return bad_dma_addr;
-
-	iommu_page = alloc_iommu(dev, npages, align_mask);
-	if (iommu_page == -1) {
-		if (!nonforced_iommu(dev, phys_mem, size))
-			return phys_mem;
-		if (panic_on_overflow)
-			panic("dma_map_area overflow %lu bytes\n", size);
-		iommu_full(dev, size, dir);
-		return bad_dma_addr;
-	}
-
-	for (i = 0; i < npages; i++) {
-		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-		phys_mem += PAGE_SIZE;
-	}
-	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
-}
-
-/* Map a single area into the IOMMU */
-static dma_addr_t gart_map_page(struct device *dev, struct page *page,
-				unsigned long offset, size_t size,
-				enum dma_data_direction dir,
-				struct dma_attrs *attrs)
-{
-	unsigned long bus;
-	phys_addr_t paddr = page_to_phys(page) + offset;
-
-	if (!dev)
-		dev = &x86_dma_fallback_dev;
-
-	if (!need_iommu(dev, paddr, size))
-		return paddr;
-
-	bus = dma_map_area(dev, paddr, size, dir, 0);
-	flush_gart();
-
-	return bus;
-}
-
-/*
- * Free a DMA mapping.
- */
-static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
-			    size_t size, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
-{
-	unsigned long iommu_page;
-	int npages;
-	int i;
-
-	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
-	    dma_addr >= iommu_bus_base + iommu_size)
-		return;
-
-	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
-	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-	for (i = 0; i < npages; i++) {
-		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-	}
-	free_iommu(iommu_page, npages);
-}
-
-/*
- * Wrapper for pci_unmap_single working with scatterlists.
- */
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			  enum dma_data_direction dir, struct dma_attrs *attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i) {
-		if (!s->dma_length || !s->length)
-			break;
-		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
-	}
-}
-
-/* Fallback for dma_map_sg in case of overflow */
-static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
-			       int nents, int dir)
-{
-	struct scatterlist *s;
-	int i;
-
-#ifdef CONFIG_IOMMU_DEBUG
-	pr_debug("dma_map_sg overflow\n");
-#endif
-
-	for_each_sg(sg, s, nents, i) {
-		unsigned long addr = sg_phys(s);
-
-		if (nonforced_iommu(dev, addr, s->length)) {
-			addr = dma_map_area(dev, addr, s->length, dir, 0);
-			if (addr == bad_dma_addr) {
-				if (i > 0)
-					gart_unmap_sg(dev, sg, i, dir, NULL);
-				nents = 0;
-				sg[0].dma_length = 0;
-				break;
-			}
-		}
-		s->dma_address = addr;
-		s->dma_length = s->length;
-	}
-	flush_gart();
-
-	return nents;
-}
-
-/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct device *dev, struct scatterlist *start,
-			  int nelems, struct scatterlist *sout,
-			  unsigned long pages)
-{
-	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
-	unsigned long iommu_page = iommu_start;
-	struct scatterlist *s;
-	int i;
-
-	if (iommu_start == -1)
-		return -1;
-
-	for_each_sg(start, s, nelems, i) {
-		unsigned long pages, addr;
-		unsigned long phys_addr = s->dma_address;
-
-		BUG_ON(s != start && s->offset);
-		if (s == start) {
-			sout->dma_address = iommu_bus_base;
-			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
-			sout->dma_length = s->length;
-		} else {
-			sout->dma_length += s->length;
-		}
-
-		addr = phys_addr;
-		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
-		while (pages--) {
-			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-			addr += PAGE_SIZE;
-			iommu_page++;
-		}
-	}
-	BUG_ON(iommu_page - iommu_start != pages);
-
-	return 0;
-}
-
-static inline int
-dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
-	     struct scatterlist *sout, unsigned long pages, int need)
-{
-	if (!need) {
-		BUG_ON(nelems != 1);
-		sout->dma_address = start->dma_address;
-		sout->dma_length = start->length;
-		return 0;
-	}
-	return __dma_map_cont(dev, start, nelems, sout, pages);
-}
-
-/*
- * DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping.
- */
-static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		       enum dma_data_direction dir, struct dma_attrs *attrs)
-{
-	struct scatterlist *s, *ps, *start_sg, *sgmap;
-	int need = 0, nextneed, i, out, start;
-	unsigned long pages = 0;
-	unsigned int seg_size;
-	unsigned int max_seg_size;
-
-	if (nents == 0)
-		return 0;
-
-	if (!dev)
-		dev = &x86_dma_fallback_dev;
-
-	out		= 0;
-	start		= 0;
-	start_sg	= sg;
-	sgmap		= sg;
-	seg_size	= 0;
-	max_seg_size	= dma_get_max_seg_size(dev);
-	ps		= NULL; /* shut up gcc */
-
-	for_each_sg(sg, s, nents, i) {
-		dma_addr_t addr = sg_phys(s);
-
-		s->dma_address = addr;
-		BUG_ON(s->length == 0);
-
-		nextneed = need_iommu(dev, addr, s->length);
-
-		/* Handle the previous not yet processed entries */
-		if (i > start) {
-			/*
-			 * Can only merge when the last chunk ends on a
-			 * page boundary and the new one doesn't have an
-			 * offset.
-			 */
-			if (!iommu_merge || !nextneed || !need || s->offset ||
-			    (s->length + seg_size > max_seg_size) ||
-			    (ps->offset + ps->length) % PAGE_SIZE) {
-				if (dma_map_cont(dev, start_sg, i - start,
-						 sgmap, pages, need) < 0)
-					goto error;
-				out++;
-
-				seg_size	= 0;
-				sgmap		= sg_next(sgmap);
-				pages		= 0;
-				start		= i;
-				start_sg	= s;
-			}
-		}
-
-		seg_size += s->length;
-		need = nextneed;
-		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
-		ps = s;
-	}
-	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
-		goto error;
-	out++;
-	flush_gart();
-	if (out < nents) {
-		sgmap = sg_next(sgmap);
-		sgmap->dma_length = 0;
-	}
-	return out;
-
-error:
-	flush_gart();
-	gart_unmap_sg(dev, sg, out, dir, NULL);
-
-	/* When it was forced or merged try again in a dumb way */
-	if (force_iommu || iommu_merge) {
-		out = dma_map_sg_nonforce(dev, sg, nents, dir);
-		if (out > 0)
-			return out;
-	}
-	if (panic_on_overflow)
-		panic("dma_map_sg: overflow on %lu pages\n", pages);
-
-	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for_each_sg(sg, s, nents, i)
-		s->dma_address = bad_dma_addr;
-	return 0;
-}
-
-/* allocate and map a coherent mapping */
-static void *
-gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
-		    gfp_t flag)
-{
-	dma_addr_t paddr;
-	unsigned long align_mask;
-	struct page *page;
-
-	if (force_iommu && !(flag & GFP_DMA)) {
-		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-		if (!page)
-			return NULL;
-
-		align_mask = (1UL << get_order(size)) - 1;
-		paddr = dma_map_area(dev, page_to_phys(page), size,
-				     DMA_BIDIRECTIONAL, align_mask);
-
-		flush_gart();
-		if (paddr != bad_dma_addr) {
-			*dma_addr = paddr;
-			return page_address(page);
-		}
-		__free_pages(page, get_order(size));
-	} else
-		return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
-
-	return NULL;
-}
-
-/* free a coherent mapping */
-static void
-gart_free_coherent(struct device *dev, size_t size, void *vaddr,
-		   dma_addr_t dma_addr)
-{
-	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return (dma_addr == bad_dma_addr);
-}
-
-static int no_agp;
-
-static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{
-	unsigned long a;
-
-	if (!iommu_size) {
-		iommu_size = aper_size;
-		if (!no_agp)
-			iommu_size /= 2;
-	}
-
-	a = aper + iommu_size;
-	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
-
-	if (iommu_size < 64*1024*1024) {
-		pr_warning(
-			"PCI-DMA: Warning: Small IOMMU %luMB."
-			" Consider increasing the AGP aperture in BIOS\n",
-				iommu_size >> 20);
-	}
-
-	return iommu_size;
-}
-
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
-{
-	unsigned aper_size = 0, aper_base_32, aper_order;
-	u64 aper_base;
-
-	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
-	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
-	aper_order = (aper_order >> 1) & 7;
-
-	aper_base = aper_base_32 & 0x7fff;
-	aper_base <<= 25;
-
-	aper_size = (32 * 1024 * 1024) << aper_order;
-	if (aper_base + aper_size > 0x100000000UL || !aper_size)
-		aper_base = 0;
-
-	*size = aper_size;
-	return aper_base;
-}
-
-static void enable_gart_translations(void)
-{
-	int i;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		struct pci_dev *dev = node_to_amd_nb(i)->misc;
-
-		enable_gart_translation(dev, __pa(agp_gatt_table));
-	}
-
-	/* Flush the GART-TLB to remove stale entries */
-	amd_flush_garts();
-}
-
-/*
- * If fix_up_north_bridges is set, the north bridges have to be fixed up on
- * resume in the same way as they are handled in gart_iommu_hole_init().
- */
-static bool fix_up_north_bridges;
-static u32 aperture_order;
-static u32 aperture_alloc;
-
-void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
-{
-	fix_up_north_bridges = true;
-	aperture_order = aper_order;
-	aperture_alloc = aper_alloc;
-}
-
-static void gart_fixup_northbridges(void)
-{
-	int i;
-
-	if (!fix_up_north_bridges)
-		return;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	pr_info("PCI-DMA: Restoring GART aperture settings\n");
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		struct pci_dev *dev = node_to_amd_nb(i)->misc;
-
-		/*
-		 * Don't enable translations just yet.  That is the next
-		 * step.  Restore the pre-suspend aperture settings.
-		 */
-		gart_set_size_and_enable(dev, aperture_order);
-		pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
-	}
-}
-
-static void gart_resume(void)
-{
-	pr_info("PCI-DMA: Resuming GART IOMMU\n");
-
-	gart_fixup_northbridges();
-
-	enable_gart_translations();
-}
-
-static struct syscore_ops gart_syscore_ops = {
-	.resume		= gart_resume,
-
-};
-
-/*
- * Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.
- */
-static __init int init_amd_gatt(struct agp_kern_info *info)
-{
-	unsigned aper_size, gatt_size, new_aper_size;
-	unsigned aper_base, new_aper_base;
-	struct pci_dev *dev;
-	void *gatt;
-	int i;
-
-	pr_info("PCI-DMA: Disabling AGP.\n");
-
-	aper_size = aper_base = info->aper_size = 0;
-	dev = NULL;
-	for (i = 0; i < amd_nb_num(); i++) {
-		dev = node_to_amd_nb(i)->misc;
-		new_aper_base = read_aperture(dev, &new_aper_size);
-		if (!new_aper_base)
-			goto nommu;
-
-		if (!aper_base) {
-			aper_size = new_aper_size;
-			aper_base = new_aper_base;
-		}
-		if (aper_size != new_aper_size || aper_base != new_aper_base)
-			goto nommu;
-	}
-	if (!aper_base)
-		goto nommu;
-
-	info->aper_base = aper_base;
-	info->aper_size = aper_size >> 20;
-
-	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
-	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					get_order(gatt_size));
-	if (!gatt)
-		panic("Cannot allocate GATT table");
-	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
-		panic("Could not set GART PTEs to uncacheable pages");
-
-	agp_gatt_table = gatt;
-
-	register_syscore_ops(&gart_syscore_ops);
-
-	flush_gart();
-
-	pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
-	       aper_base, aper_size>>10);
-
-	return 0;
-
- nommu:
-	/* Should not happen anymore */
-	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
-	       "falling back to iommu=soft.\n");
-	return -1;
-}
-
-static struct dma_map_ops gart_dma_ops = {
-	.map_sg				= gart_map_sg,
-	.unmap_sg			= gart_unmap_sg,
-	.map_page			= gart_map_page,
-	.unmap_page			= gart_unmap_page,
-	.alloc_coherent			= gart_alloc_coherent,
-	.free_coherent			= gart_free_coherent,
-	.mapping_error			= gart_mapping_error,
-};
-
-static void gart_iommu_shutdown(void)
-{
-	struct pci_dev *dev;
-	int i;
-
-	/* don't shutdown it if there is AGP installed */
-	if (!no_agp)
-		return;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		u32 ctl;
-
-		dev = node_to_amd_nb(i)->misc;
-		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-
-		ctl &= ~GARTEN;
-
-		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
-	}
-}
-
-int __init gart_iommu_init(void)
-{
-	struct agp_kern_info info;
-	unsigned long iommu_start;
-	unsigned long aper_base, aper_size;
-	unsigned long start_pfn, end_pfn;
-	unsigned long scratch;
-	long i;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return 0;
-
-#ifndef CONFIG_AGP_AMD64
-	no_agp = 1;
-#else
-	/* Makefile puts PCI initialization via subsys_initcall first. */
-	/* Add other AMD AGP bridge drivers here */
-	no_agp = no_agp ||
-		(agp_amd64_init() < 0) ||
-		(agp_copy_info(agp_bridge, &info) < 0);
-#endif
-
-	if (no_iommu ||
-	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
-	    !gart_iommu_aperture ||
-	    (no_agp && init_amd_gatt(&info) < 0)) {
-		if (max_pfn > MAX_DMA32_PFN) {
-			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
-			pr_warning("falling back to iommu=soft.\n");
-		}
-		return 0;
-	}
-
-	/* need to map that range */
-	aper_size	= info.aper_size << 20;
-	aper_base	= info.aper_base;
-	end_pfn		= (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
-
-	if (end_pfn > max_low_pfn_mapped) {
-		start_pfn = (aper_base>>PAGE_SHIFT);
-		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	}
-
-	pr_info("PCI-DMA: using GART IOMMU.\n");
-	iommu_size = check_iommu_size(info.aper_base, aper_size);
-	iommu_pages = iommu_size >> PAGE_SHIFT;
-
-	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-						      get_order(iommu_pages/8));
-	if (!iommu_gart_bitmap)
-		panic("Cannot allocate iommu bitmap\n");
-
-#ifdef CONFIG_IOMMU_LEAK
-	if (leak_trace) {
-		int ret;
-
-		ret = dma_debug_resize_entries(iommu_pages);
-		if (ret)
-			pr_debug("PCI-DMA: Cannot trace all the entries\n");
-	}
-#endif
-
-	/*
-	 * Out of IOMMU space handling.
-	 * Reserve some invalid pages at the beginning of the GART.
-	 */
-	bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
-
-	pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
-	       iommu_size >> 20);
-
-	agp_memory_reserved	= iommu_size;
-	iommu_start		= aper_size - iommu_size;
-	iommu_bus_base		= info.aper_base + iommu_start;
-	bad_dma_addr		= iommu_bus_base;
-	iommu_gatt_base		= agp_gatt_table + (iommu_start>>PAGE_SHIFT);
-
-	/*
-	 * Unmap the IOMMU part of the GART. The alias of the page is
-	 * always mapped with cache enabled and there is no full cache
-	 * coherency across the GART remapping. The unmapping avoids
-	 * automatic prefetches from the CPU allocating cache lines in
-	 * there. All CPU accesses are done via the direct mapping to
-	 * the backing memory. The GART address is only used by PCI
-	 * devices.
-	 */
-	set_memory_np((unsigned long)__va(iommu_bus_base),
-				iommu_size >> PAGE_SHIFT);
-	/*
-	 * Tricky. The GART table remaps the physical memory range,
-	 * so the CPU wont notice potential aliases and if the memory
-	 * is remapped to UC later on, we might surprise the PCI devices
-	 * with a stray writeout of a cacheline. So play it sure and
-	 * do an explicit, full-scale wbinvd() _after_ having marked all
-	 * the pages as Not-Present:
-	 */
-	wbinvd();
-
-	/*
-	 * Now all caches are flushed and we can safely enable
-	 * GART hardware.  Doing it early leaves the possibility
-	 * of stale cache entries that can lead to GART PTE
-	 * errors.
-	 */
-	enable_gart_translations();
-
-	/*
-	 * Try to workaround a bug (thanks to BenH):
-	 * Set unmapped entries to a scratch page instead of 0.
-	 * Any prefetches that hit unmapped entries won't get an bus abort
-	 * then. (P2P bridge may be prefetching on DMA reads).
-	 */
-	scratch = get_zeroed_page(GFP_KERNEL);
-	if (!scratch)
-		panic("Cannot allocate iommu scratch page");
-	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
-	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
-		iommu_gatt_base[i] = gart_unmapped_entry;
-
-	flush_gart();
-	dma_ops = &gart_dma_ops;
-	x86_platform.iommu_shutdown = gart_iommu_shutdown;
-	swiotlb = 0;
-
-	return 0;
-}
-
-void __init gart_parse_options(char *p)
-{
-	int arg;
-
-#ifdef CONFIG_IOMMU_LEAK
-	if (!strncmp(p, "leak", 4)) {
-		leak_trace = 1;
-		p += 4;
-		if (*p == '=')
-			++p;
-		if (isdigit(*p) && get_option(&p, &arg))
-			iommu_leak_pages = arg;
-	}
-#endif
-	if (isdigit(*p) && get_option(&p, &arg))
-		iommu_size = arg;
-	if (!strncmp(p, "fullflush", 9))
-		iommu_fullflush = 1;
-	if (!strncmp(p, "nofullflush", 11))
-		iommu_fullflush = 0;
-	if (!strncmp(p, "noagp", 5))
-		no_agp = 1;
-	if (!strncmp(p, "noaperture", 10))
-		fix_aperture = 0;
-	/* duplicated from pci-dma.c */
-	if (!strncmp(p, "force", 5))
-		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p, "allowed", 7))
-		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p, "memaper", 7)) {
-		fallback_aper_force = 1;
-		p += 7;
-		if (*p == '=') {
-			++p;
-			if (get_option(&p, &arg))
-				fallback_aper_order = arg;
-		}
-	}
-}
-IOMMU_INIT_POST(gart_iommu_hole_init);
-- 
cgit v0.10.2