From 941b77a3b6946dd6223a029007f695aa841b6d34 Mon Sep 17 00:00:00 2001
From: Per Friden <per.friden@stericsson.com>
Date: Sun, 20 Jun 2010 21:24:45 +0000
Subject: DMAENGINE: ste_dma40: fixed lli_max=1 issue

Fixed lli_max=1 issue in case of full lcla, currently this case
is not properly handled.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index c426829..4618d6c 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -81,9 +81,10 @@ struct d40_lli_pool {
  * lli_len equals one.
  * @lli_log: Same as above but for logical channels.
  * @lli_pool: The pool with two entries pre-allocated.
- * @lli_len: Number of LLI's in lli_pool
- * @lli_tcount: Number of LLIs processed in the transfer. When equals lli_len
- * then this transfer job is done.
+ * @lli_len: Number of llis of current descriptor.
+ * @lli_count: Number of transfered llis.
+ * @lli_tx_len: Max number of LLIs per transfer, there can be
+ * many transfer for one descriptor.
  * @txd: DMA engine struct. Used for among other things for communication
  * during a transfer.
  * @node: List entry.
@@ -100,8 +101,9 @@ struct d40_desc {
 	struct d40_log_lli_bidir	 lli_log;
 
 	struct d40_lli_pool		 lli_pool;
-	u32				 lli_len;
-	u32				 lli_tcount;
+	int				 lli_len;
+	int				 lli_count;
+	u32				 lli_tx_len;
 
 	struct dma_async_tx_descriptor	 txd;
 	struct list_head		 node;
@@ -365,11 +367,6 @@ static dma_cookie_t d40_assign_cookie(struct d40_chan *d40c,
 	return cookie;
 }
 
-static void d40_desc_reset(struct d40_desc *d40d)
-{
-	d40d->lli_tcount = 0;
-}
-
 static void d40_desc_remove(struct d40_desc *d40d)
 {
 	list_del(&d40d->node);
@@ -738,25 +735,18 @@ static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 				  d40c->phy_chan->num,
 				  d40d->lli_phy.dst,
 				  d40d->lli_phy.src);
-		d40d->lli_tcount = d40d->lli_len;
 	} else if (d40d->lli_log.dst && d40d->lli_log.src) {
-		u32 lli_len;
 		struct d40_log_lli *src = d40d->lli_log.src;
 		struct d40_log_lli *dst = d40d->lli_log.dst;
 
-		src += d40d->lli_tcount;
-		dst += d40d->lli_tcount;
-
-		if (d40d->lli_len <= d40c->base->plat_data->llis_per_log)
-			lli_len = d40d->lli_len;
-		else
-			lli_len = d40c->base->plat_data->llis_per_log;
-		d40d->lli_tcount += lli_len;
+		src += d40d->lli_count;
+		dst += d40d->lli_count;
 		d40_log_lli_write(d40c->lcpa, d40c->lcla.src,
 				  d40c->lcla.dst,
 				  dst, src,
 				  d40c->base->plat_data->llis_per_log);
 	}
+	d40d->lli_count += d40d->lli_tx_len;
 }
 
 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -838,7 +828,7 @@ static void dma_tc_handle(struct d40_chan *d40c)
 	if (d40d == NULL)
 		return;
 
-	if (d40d->lli_tcount < d40d->lli_len) {
+	if (d40d->lli_count < d40d->lli_len) {
 
 		d40_desc_load(d40c, d40d);
 		/* Start dma job */
@@ -891,7 +881,6 @@ static void dma_tasklet(unsigned long data)
 		/* Return desc to free-list */
 		d40_desc_free(d40c, d40d_fin);
 	} else {
-		d40_desc_reset(d40d_fin);
 		if (!d40d_fin->is_in_client_list) {
 			d40_desc_remove(d40d_fin);
 			list_add_tail(&d40d_fin->node, &d40c->client);
@@ -1573,7 +1562,6 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
 	unsigned long flg;
-	int lli_max = d40c->base->plat_data->llis_per_log;
 
 
 	spin_lock_irqsave(&d40c->lock, flg);
@@ -1584,10 +1572,13 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 
 	memset(d40d, 0, sizeof(struct d40_desc));
 	d40d->lli_len = sgl_len;
-
+	d40d->lli_tx_len = d40d->lli_len;
 	d40d->txd.flags = flags;
 
 	if (d40c->log_num != D40_PHY_CHAN) {
+		if (d40d->lli_len > d40c->base->plat_data->llis_per_log)
+			d40d->lli_tx_len = d40c->base->plat_data->llis_per_log;
+
 		if (sgl_len > 1)
 			/*
 			 * Check if there is space available in lcla. If not,
@@ -1596,7 +1587,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 			 */
 			if (d40_lcla_id_get(d40c,
 					    &d40c->base->lcla_pool) != 0)
-				lli_max = 1;
+				d40d->lli_tx_len = 1;
 
 		if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
@@ -1610,7 +1601,8 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40d->lli_log.src,
 					 d40c->log_def.lcsp1,
 					 d40c->dma_cfg.src_info.data_width,
-					 flags & DMA_PREP_INTERRUPT, lli_max,
+					 flags & DMA_PREP_INTERRUPT,
+					 d40d->lli_tx_len,
 					 d40c->base->plat_data->llis_per_log);
 
 		(void) d40_log_sg_to_lli(d40c->lcla.dst_id,
@@ -1619,7 +1611,8 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40d->lli_log.dst,
 					 d40c->log_def.lcsp3,
 					 d40c->dma_cfg.dst_info.data_width,
-					 flags & DMA_PREP_INTERRUPT, lli_max,
+					 flags & DMA_PREP_INTERRUPT,
+					 d40d->lli_tx_len,
 					 d40c->base->plat_data->llis_per_log);
 
 
@@ -1794,6 +1787,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 			goto err;
 		}
 		d40d->lli_len = 1;
+		d40d->lli_tx_len = 1;
 
 		d40_log_fill_lli(d40d->lli_log.src,
 				 src,
@@ -1869,7 +1863,6 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 {
 	dma_addr_t dev_addr = 0;
 	int total_size;
-	int lli_max = d40c->base->plat_data->llis_per_log;
 
 	if (d40_pool_lli_alloc(d40d, sg_len, true) < 0) {
 		dev_err(&d40c->chan.dev->device,
@@ -1878,7 +1871,10 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 	}
 
 	d40d->lli_len = sg_len;
-	d40d->lli_tcount = 0;
+	if (d40d->lli_len <= d40c->base->plat_data->llis_per_log)
+		d40d->lli_tx_len = d40d->lli_len;
+	else
+		d40d->lli_tx_len = d40c->base->plat_data->llis_per_log;
 
 	if (sg_len > 1)
 		/*
@@ -1887,7 +1883,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 		 * in lcpa space.
 		 */
 		if (d40_lcla_id_get(d40c, &d40c->base->lcla_pool) != 0)
-			lli_max = 1;
+			d40d->lli_tx_len = 1;
 
 	if (direction == DMA_FROM_DEVICE) {
 		dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
@@ -1899,7 +1895,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 					       d40c->dma_cfg.dst_info.data_width,
 					       direction,
 					       flags & DMA_PREP_INTERRUPT,
-					       dev_addr, lli_max,
+					       dev_addr, d40d->lli_tx_len,
 					       d40c->base->plat_data->llis_per_log);
 	} else if (direction == DMA_TO_DEVICE) {
 		dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
@@ -1911,7 +1907,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 					       d40c->dma_cfg.dst_info.data_width,
 					       direction,
 					       flags & DMA_PREP_INTERRUPT,
-					       dev_addr, lli_max,
+					       dev_addr, d40d->lli_tx_len,
 					       d40c->base->plat_data->llis_per_log);
 	} else
 		return -EINVAL;
@@ -1939,7 +1935,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	}
 
 	d40d->lli_len = sgl_len;
-	d40d->lli_tcount = 0;
+	d40d->lli_tx_len = sgl_len;
 
 	if (direction == DMA_FROM_DEVICE) {
 		dst_dev_addr = 0;
-- 
cgit v0.10.2


From ef1872ec652b3bc472d6c0995d0b64d5058878ea Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sun, 20 Jun 2010 21:24:52 +0000
Subject: DMAENGINE: ste_dma40: only write phy channel config first time

We only need to write the configuration to a physical channel if
it is free, else it is already written.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 4618d6c..9655452 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1210,30 +1210,6 @@ out:
 
 }
 
-static int d40_config_chan(struct d40_chan *d40c,
-			   struct stedma40_chan_cfg *info)
-{
-
-	/* Fill in basic CFG register values */
-	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
-		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
-
-	if (d40c->log_num != D40_PHY_CHAN) {
-		d40_log_cfg(&d40c->dma_cfg,
-			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
-
-		if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
-			d40c->lcpa = d40c->base->lcpa_base +
-				d40c->dma_cfg.src_dev_type * 32;
-		else
-			d40c->lcpa = d40c->base->lcpa_base +
-				d40c->dma_cfg.dst_dev_type * 32 + 16;
-	}
-
-	/* Write channel configuration to the DMA */
-	return d40_config_write(d40c);
-}
-
 static int d40_config_memcpy(struct d40_chan *d40c)
 {
 	dma_cap_mask_t cap = d40c->chan.device->cap_mask;
@@ -1691,20 +1667,21 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	unsigned long flags;
 	struct d40_chan *d40c =
 		container_of(chan, struct d40_chan, chan);
-
+	bool is_free_phy;
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	d40c->completed = chan->cookie = 1;
 
 	/*
 	 * If no dma configuration is set (channel_type == 0)
-	 * use default configuration
+	 * use default configuration (memcpy)
 	 */
 	if (d40c->dma_cfg.channel_type == 0) {
 		err = d40_config_memcpy(d40c);
 		if (err)
 			goto err_alloc;
 	}
+	is_free_phy = (d40c->phy_chan == NULL);
 
 	err = d40_allocate_channel(d40c);
 	if (err) {
@@ -1713,12 +1690,35 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 		goto err_alloc;
 	}
 
-	err = d40_config_chan(d40c, &d40c->dma_cfg);
-	if (err) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to configure channel\n",
-			__func__);
-		goto err_config;
+	/* Fill in basic CFG register values */
+	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
+		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
+
+	if (d40c->log_num != D40_PHY_CHAN) {
+		d40_log_cfg(&d40c->dma_cfg,
+			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+
+		if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
+			d40c->lcpa = d40c->base->lcpa_base +
+			  d40c->dma_cfg.src_dev_type * D40_LCPA_CHAN_SIZE;
+		else
+			d40c->lcpa = d40c->base->lcpa_base +
+			  d40c->dma_cfg.dst_dev_type *
+			  D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
+	}
+
+	/*
+	 * Only write channel configuration to the DMA if the physical
+	 * resource is free. In case of multiple logical channels
+	 * on the same physical resource, only the first write is necessary.
+	 */
+	if (is_free_phy) {
+		err = d40_config_write(d40c);
+		if (err) {
+			dev_err(&d40c->chan.dev->device,
+				"[%s] Failed to configure channel\n",
+				__func__);
+		}
 	}
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 2029280..c081f28 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -13,6 +13,9 @@
 #define D40_DREG_PCDELTA	(8 * 4)
 #define D40_LLI_ALIGN		16 /* LLI alignment must be 16 bytes. */
 
+#define D40_LCPA_CHAN_SIZE 32
+#define D40_LCPA_CHAN_DST_DELTA 16
+
 #define D40_TYPE_TO_GROUP(type) (type / 16)
 #define D40_TYPE_TO_EVENT(type) (type % 16)
 
-- 
cgit v0.10.2


From a8be8627a3a00859367d219848f2cd771d6aa925 Mon Sep 17 00:00:00 2001
From: Per Friden <per.friden@stericsson.com>
Date: Sun, 20 Jun 2010 21:24:59 +0000
Subject: DMAENGINE: ste_dma40: moved freeing of client owned desc

fixed typo and moved freeing of client owned desc to free_dma.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 9655452..d72eff6 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -374,7 +374,6 @@ static void d40_desc_remove(struct d40_desc *d40d)
 
 static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 {
-	struct d40_desc *desc;
 	struct d40_desc *d;
 	struct d40_desc *_d;
 
@@ -583,8 +582,6 @@ done:
 static void d40_term_all(struct d40_chan *d40c)
 {
 	struct d40_desc *d40d;
-	struct d40_desc *d;
-	struct d40_desc *_d;
 
 	/* Release active descriptors */
 	while ((d40d = d40_first_active_get(d40c))) {
@@ -602,15 +599,6 @@ static void d40_term_all(struct d40_chan *d40c)
 		d40_desc_free(d40c, d40d);
 	}
 
-	/* Release client owned descriptors */
-	if (!list_empty(&d40c->client))
-		list_for_each_entry_safe(d, _d, &d40c->client, node) {
-			d40_pool_lli_free(d);
-			d40_desc_remove(d);
-			/* Return desc to free-list */
-			d40_desc_free(d40c, d40d);
-		}
-
 	d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
 			d40c->lcla.src_id);
 	d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
@@ -1240,10 +1228,22 @@ static int d40_free_dma(struct d40_chan *d40c)
 	u32 event, dir;
 	struct d40_phy_res *phy = d40c->phy_chan;
 	bool is_src;
+	struct d40_desc *d;
+	struct d40_desc *_d;
+
 
 	/* Terminate all queued and active transfers */
 	d40_term_all(d40c);
 
+	/* Release client owned descriptors */
+	if (!list_empty(&d40c->client))
+		list_for_each_entry_safe(d, _d, &d40c->client, node) {
+			d40_pool_lli_free(d);
+			d40_desc_remove(d);
+			/* Return desc to free-list */
+			d40_desc_free(d40c, d);
+		}
+
 	if (phy == NULL) {
 		dev_err(&d40c->chan.dev->device, "[%s] phy == null\n",
 			__func__);
-- 
cgit v0.10.2


From c675b1b424446b52de00cad84ca3d823d29a3d0e Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:08 +0000
Subject: DMAENGINE: ste_dma40: use kmem cache

Use kmem cache instead of own cache, reducing code duplication
and bug sources.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index d72eff6..b60a6db 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -34,10 +34,6 @@
 #define D40_ALLOC_PHY		(1 << 30)
 #define D40_ALLOC_LOG_FREE	0
 
-/* The number of free d40_desc to keep in memory before starting
- * to kfree() them */
-#define D40_DESC_CACHE_SIZE 50
-
 /* Hardware designer of the block */
 #define D40_PERIPHID2_DESIGNER 0x8
 
@@ -172,8 +168,6 @@ struct d40_base;
  * @client: Cliented owned descriptor list.
  * @active: Active descriptor.
  * @queue: Queued jobs.
- * @free: List of free descripts, ready to be reused.
- * @free_len: Number of descriptors in the free list.
  * @dma_cfg: The client configuration of this dma channel.
  * @base: Pointer to the device instance struct.
  * @src_def_cfg: Default cfg register setting for src.
@@ -197,8 +191,6 @@ struct d40_chan {
 	struct list_head		 client;
 	struct list_head		 active;
 	struct list_head		 queue;
-	struct list_head		 free;
-	int				 free_len;
 	struct stedma40_chan_cfg	 dma_cfg;
 	struct d40_base			*base;
 	/* Default register configurations */
@@ -242,6 +234,7 @@ struct d40_chan {
  * @lcpa_base: The virtual mapped address of LCPA.
  * @phy_lcpa: The physical address of the LCPA.
  * @lcpa_size: The size of the LCPA area.
+ * @desc_slab: cache for descriptors.
  */
 struct d40_base {
 	spinlock_t			 interrupt_lock;
@@ -268,6 +261,7 @@ struct d40_base {
 	void				 *lcpa_base;
 	dma_addr_t			  phy_lcpa;
 	resource_size_t			  lcpa_size;
+	struct kmem_cache		 *desc_slab;
 };
 
 /**
@@ -382,36 +376,21 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 			if (async_tx_test_ack(&d->txd)) {
 				d40_pool_lli_free(d);
 				d40_desc_remove(d);
-				desc = d;
-				goto out;
+				break;
 			}
-	}
-
-	if (list_empty(&d40c->free)) {
-		/* Alloc new desc because we're out of used ones */
-		desc = kzalloc(sizeof(struct d40_desc), GFP_NOWAIT);
-		if (desc == NULL)
-			goto out;
-		INIT_LIST_HEAD(&desc->node);
 	} else {
-		/* Reuse an old desc. */
-		desc = list_first_entry(&d40c->free,
-					struct d40_desc,
-					node);
-		list_del(&desc->node);
-		d40c->free_len--;
+		d = kmem_cache_alloc(d40c->base->desc_slab, GFP_NOWAIT);
+		if (d != NULL) {
+			memset(d, 0, sizeof(struct d40_desc));
+			INIT_LIST_HEAD(&d->node);
+		}
 	}
-out:
-	return desc;
+	return d;
 }
 
 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
-	if (d40c->free_len < D40_DESC_CACHE_SIZE) {
-		list_add_tail(&d40d->node, &d40c->free);
-		d40c->free_len++;
-	} else
-		kfree(d40d);
+	kmem_cache_free(d40c->base->desc_slab, d40d);
 }
 
 static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
@@ -2107,13 +2086,10 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 
 		d40c->log_num = D40_PHY_CHAN;
 
-		INIT_LIST_HEAD(&d40c->free);
 		INIT_LIST_HEAD(&d40c->active);
 		INIT_LIST_HEAD(&d40c->queue);
 		INIT_LIST_HEAD(&d40c->client);
 
-		d40c->free_len = 0;
-
 		tasklet_init(&d40c->tasklet, dma_tasklet,
 			     (unsigned long) d40c);
 
@@ -2398,6 +2374,12 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	if (!base->lcla_pool.alloc_map)
 		goto failure;
 
+	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL);
+	if (base->desc_slab == NULL)
+		goto failure;
+
 	return base;
 
 failure:
@@ -2612,6 +2594,8 @@ static int __init d40_probe(struct platform_device *pdev)
 
 failure:
 	if (base) {
+		if (base->desc_slab)
+			kmem_cache_destroy(base->desc_slab);
 		if (base->virtbase)
 			iounmap(base->virtbase);
 		if (base->lcla_pool.phy)
-- 
cgit v0.10.2


From ff0b12baa50390ba6a963cb6f6162a94ed4fc333 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:15 +0000
Subject: DMAENGINE: ste_dma40: improve allocation error handling

Better error handling when channel allocation fails.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index b60a6db..8ed1547 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1239,7 +1239,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res) {
-		dev_err(&d40c->chan.dev->device, "[%s] suspend\n",
+		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
 			__func__);
 		return res;
 	}
@@ -1657,8 +1657,12 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	 */
 	if (d40c->dma_cfg.channel_type == 0) {
 		err = d40_config_memcpy(d40c);
-		if (err)
-			goto err_alloc;
+		if (err) {
+			dev_err(&d40c->chan.dev->device,
+				"[%s] Failed to configure memcpy channel\n",
+				__func__);
+			goto fail;
+		}
 	}
 	is_free_phy = (d40c->phy_chan == NULL);
 
@@ -1666,7 +1670,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	if (err) {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Failed to allocate channel\n", __func__);
-		goto err_alloc;
+		goto fail;
 	}
 
 	/* Fill in basic CFG register values */
@@ -1699,17 +1703,9 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 				__func__);
 		}
 	}
-
+fail:
 	spin_unlock_irqrestore(&d40c->lock, flags);
-	return 0;
-
- err_config:
-	(void) d40_free_dma(d40c);
- err_alloc:
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	dev_err(&d40c->chan.dev->device,
-		"[%s] Channel allocation failed\n", __func__);
-	return -EINVAL;
+	return err;
 }
 
 static void d40_free_chan_resources(struct dma_chan *chan)
-- 
cgit v0.10.2


From 2a6143407d9114a0c5d16a7eed1a0892a4ce9f19 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:24 +0000
Subject: DMAENGINE: ste_dma40: various cosmetic clean-ups

This cleans up some extra newlines, removes some code duplication
and moves the code to comply better with checkpatch.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8ed1547..1d17664 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -161,7 +161,8 @@ struct d40_base;
  * @pending_tx: The number of pending transfers. Used between interrupt handler
  * and tasklet.
  * @busy: Set to true when transfer is ongoing on this channel.
- * @phy_chan: Pointer to physical channel which this instance runs on.
+ * @phy_chan: Pointer to physical channel which this instance runs on. If this
+ * point is NULL, then the channel is not allocated.
  * @chan: DMA engine handle.
  * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
  * transfer and call client callback.
@@ -1236,7 +1237,6 @@ static int d40_free_dma(struct d40_chan *d40c)
 		return -EINVAL;
 	}
 
-
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res) {
 		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
@@ -1305,8 +1305,6 @@ static int d40_free_dma(struct d40_chan *d40c)
 	d40c->base->lookup_phy_chans[phy->num] = NULL;
 
 	return 0;
-
-
 }
 
 static int d40_pause(struct dma_chan *chan)
@@ -1314,7 +1312,6 @@ static int d40_pause(struct dma_chan *chan)
 	struct d40_chan *d40c =
 		container_of(chan, struct d40_chan, chan);
 	int res;
-
 	unsigned long flags;
 
 	spin_lock_irqsave(&d40c->lock, flags);
@@ -1510,25 +1507,23 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 						   struct scatterlist *sgl_dst,
 						   struct scatterlist *sgl_src,
 						   unsigned int sgl_len,
-						   unsigned long flags)
+						   unsigned long dma_flags)
 {
 	int res;
 	struct d40_desc *d40d;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
-	unsigned long flg;
-
+	unsigned long flags;
 
-	spin_lock_irqsave(&d40c->lock, flg);
+	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
 
 	if (d40d == NULL)
 		goto err;
 
-	memset(d40d, 0, sizeof(struct d40_desc));
 	d40d->lli_len = sgl_len;
 	d40d->lli_tx_len = d40d->lli_len;
-	d40d->txd.flags = flags;
+	d40d->txd.flags = dma_flags;
 
 	if (d40c->log_num != D40_PHY_CHAN) {
 		if (d40d->lli_len > d40c->base->plat_data->llis_per_log)
@@ -1556,7 +1551,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40d->lli_log.src,
 					 d40c->log_def.lcsp1,
 					 d40c->dma_cfg.src_info.data_width,
-					 flags & DMA_PREP_INTERRUPT,
+					 dma_flags & DMA_PREP_INTERRUPT,
 					 d40d->lli_tx_len,
 					 d40c->base->plat_data->llis_per_log);
 
@@ -1566,7 +1561,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40d->lli_log.dst,
 					 d40c->log_def.lcsp3,
 					 d40c->dma_cfg.dst_info.data_width,
-					 flags & DMA_PREP_INTERRUPT,
+					 dma_flags & DMA_PREP_INTERRUPT,
 					 d40d->lli_tx_len,
 					 d40c->base->plat_data->llis_per_log);
 
@@ -1612,11 +1607,11 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 
 	d40d->txd.tx_submit = d40_tx_submit;
 
-	spin_unlock_irqrestore(&d40c->lock, flg);
+	spin_unlock_irqrestore(&d40c->lock, flags);
 
 	return &d40d->txd;
 err:
-	spin_unlock_irqrestore(&d40c->lock, flg);
+	spin_unlock_irqrestore(&d40c->lock, flags);
 	return NULL;
 }
 EXPORT_SYMBOL(stedma40_memcpy_sg);
@@ -1729,15 +1724,15 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 						       dma_addr_t dst,
 						       dma_addr_t src,
 						       size_t size,
-						       unsigned long flags)
+						       unsigned long dma_flags)
 {
 	struct d40_desc *d40d;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
-	unsigned long flg;
+	unsigned long flags;
 	int err = 0;
 
-	spin_lock_irqsave(&d40c->lock, flg);
+	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
 
 	if (d40d == NULL) {
@@ -1746,9 +1741,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 		goto err;
 	}
 
-	memset(d40d, 0, sizeof(struct d40_desc));
-
-	d40d->txd.flags = flags;
+	d40d->txd.flags = dma_flags;
 
 	dma_async_tx_descriptor_init(&d40d->txd, chan);
 
@@ -1817,7 +1810,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 				      d40d->lli_pool.size, DMA_TO_DEVICE);
 	}
 
-	spin_unlock_irqrestore(&d40c->lock, flg);
+	spin_unlock_irqrestore(&d40c->lock, flags);
 	return &d40d->txd;
 
 err_fill_lli:
@@ -1825,7 +1818,7 @@ err_fill_lli:
 		"[%s] Failed filling in PHY LLI\n", __func__);
 	d40_pool_lli_free(d40d);
 err:
-	spin_unlock_irqrestore(&d40c->lock, flg);
+	spin_unlock_irqrestore(&d40c->lock, flags);
 	return NULL;
 }
 
@@ -1834,7 +1827,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 				 struct scatterlist *sgl,
 				 unsigned int sg_len,
 				 enum dma_data_direction direction,
-				 unsigned long flags)
+				 unsigned long dma_flags)
 {
 	dma_addr_t dev_addr = 0;
 	int total_size;
@@ -1860,32 +1853,24 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 		if (d40_lcla_id_get(d40c, &d40c->base->lcla_pool) != 0)
 			d40d->lli_tx_len = 1;
 
-	if (direction == DMA_FROM_DEVICE) {
+	if (direction == DMA_FROM_DEVICE)
 		dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
-		total_size = d40_log_sg_to_dev(&d40c->lcla,
-					       sgl, sg_len,
-					       &d40d->lli_log,
-					       &d40c->log_def,
-					       d40c->dma_cfg.src_info.data_width,
-					       d40c->dma_cfg.dst_info.data_width,
-					       direction,
-					       flags & DMA_PREP_INTERRUPT,
-					       dev_addr, d40d->lli_tx_len,
-					       d40c->base->plat_data->llis_per_log);
-	} else if (direction == DMA_TO_DEVICE) {
+	else if (direction == DMA_TO_DEVICE)
 		dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
-		total_size = d40_log_sg_to_dev(&d40c->lcla,
-					       sgl, sg_len,
-					       &d40d->lli_log,
-					       &d40c->log_def,
-					       d40c->dma_cfg.src_info.data_width,
-					       d40c->dma_cfg.dst_info.data_width,
-					       direction,
-					       flags & DMA_PREP_INTERRUPT,
-					       dev_addr, d40d->lli_tx_len,
-					       d40c->base->plat_data->llis_per_log);
-	} else
+	else
 		return -EINVAL;
+
+	total_size = d40_log_sg_to_dev(&d40c->lcla,
+				       sgl, sg_len,
+				       &d40d->lli_log,
+				       &d40c->log_def,
+				       d40c->dma_cfg.src_info.data_width,
+				       d40c->dma_cfg.dst_info.data_width,
+				       direction,
+				       dma_flags & DMA_PREP_INTERRUPT,
+				       dev_addr, d40d->lli_tx_len,
+				       d40c->base->plat_data->llis_per_log);
+
 	if (total_size < 0)
 		return -EINVAL;
 
@@ -1897,7 +1882,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 				 struct scatterlist *sgl,
 				 unsigned int sgl_len,
 				 enum dma_data_direction direction,
-				 unsigned long flags)
+				 unsigned long dma_flags)
 {
 	dma_addr_t src_dev_addr;
 	dma_addr_t dst_dev_addr;
@@ -1954,12 +1939,12 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
 							 enum dma_data_direction direction,
-							 unsigned long flags)
+							 unsigned long dma_flags)
 {
 	struct d40_desc *d40d;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
-	unsigned long flg;
+	unsigned long flags;
 	int err;
 
 	if (d40c->dma_cfg.pre_transfer)
@@ -1967,9 +1952,9 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 					   d40c->dma_cfg.pre_transfer_data,
 					   sg_dma_len(sgl));
 
-	spin_lock_irqsave(&d40c->lock, flg);
+	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
-	spin_unlock_irqrestore(&d40c->lock, flg);
+	spin_unlock_irqrestore(&d40c->lock, flags);
 
 	if (d40d == NULL)
 		return NULL;
@@ -1978,10 +1963,10 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 
 	if (d40c->log_num != D40_PHY_CHAN)
 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
-					    direction, flags);
+					    direction, dma_flags);
 	else
 		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
-					    direction, flags);
+					    direction, dma_flags);
 	if (err) {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Failed to prepare %s slave sg job: %d\n",
@@ -1990,7 +1975,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 		return NULL;
 	}
 
-	d40d->txd.flags = flags;
+	d40d->txd.flags = dma_flags;
 
 	dma_async_tx_descriptor_init(&d40d->txd, chan);
 
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 561fdd8..e0194e4 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -430,25 +430,25 @@ void d40_log_lli_write(struct d40_log_lli_full *lcpa,
 		       struct d40_log_lli *lli_src,
 		       int llis_per_log)
 {
-	u32 slos = 0;
-	u32 dlos = 0;
+	u32 slos;
+	u32 dlos;
 	int i;
 
-	lcpa->lcsp0 = lli_src->lcsp02;
-	lcpa->lcsp1 = lli_src->lcsp13;
-	lcpa->lcsp2 = lli_dst->lcsp02;
-	lcpa->lcsp3 = lli_dst->lcsp13;
+	writel(lli_src->lcsp02, &lcpa->lcsp0);
+	writel(lli_src->lcsp13, &lcpa->lcsp1);
+	writel(lli_dst->lcsp02, &lcpa->lcsp2);
+	writel(lli_dst->lcsp13, &lcpa->lcsp3);
 
 	slos = lli_src->lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
 	dlos = lli_dst->lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
 
 	for (i = 0; (i < llis_per_log) && slos && dlos; i++) {
-		writel(lli_src[i+1].lcsp02, &lcla_src[i].lcsp02);
-		writel(lli_src[i+1].lcsp13, &lcla_src[i].lcsp13);
-		writel(lli_dst[i+1].lcsp02, &lcla_dst[i].lcsp02);
-		writel(lli_dst[i+1].lcsp13, &lcla_dst[i].lcsp13);
+		writel(lli_src[i + 1].lcsp02, &lcla_src[i].lcsp02);
+		writel(lli_src[i + 1].lcsp13, &lcla_src[i].lcsp13);
+		writel(lli_dst[i + 1].lcsp02, &lcla_dst[i].lcsp02);
+		writel(lli_dst[i + 1].lcsp13, &lcla_dst[i].lcsp13);
 
-		slos = lli_src[i+1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
-		dlos = lli_dst[i+1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
+		slos = lli_src[i + 1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
+		dlos = lli_dst[i + 1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
 	}
 }
-- 
cgit v0.10.2


From 0d0f6b8bbb13e1287f4f2a271e4f4306789511a2 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:31 +0000
Subject: DMAENGINE: ste_dma40: deny ops on non-alloc channels

Added checks to deny operating on none-allocated channels.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 1d17664..4d56d21 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1515,6 +1515,12 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					     chan);
 	unsigned long flags;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Unallocated channel.\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
 	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
 
@@ -1710,6 +1716,13 @@ static void d40_free_chan_resources(struct dma_chan *chan)
 	int err;
 	unsigned long flags;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Cannot free unallocated channel\n", __func__);
+		return;
+	}
+
+
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	err = d40_free_dma(d40c);
@@ -1732,6 +1745,12 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	unsigned long flags;
 	int err = 0;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Channel is not allocated.\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
 	spin_lock_irqsave(&d40c->lock, flags);
 	d40d = d40_desc_get(d40c);
 
@@ -1947,6 +1966,12 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	unsigned long flags;
 	int err;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Cannot prepare unallocated channel\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (d40c->dma_cfg.pre_transfer)
 		d40c->dma_cfg.pre_transfer(chan,
 					   d40c->dma_cfg.pre_transfer_data,
@@ -1993,6 +2018,13 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
 	dma_cookie_t last_complete;
 	int ret;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Cannot read status of unallocated channel\n",
+			__func__);
+		return -EINVAL;
+	}
+
 	last_complete = d40c->completed;
 	last_used = chan->cookie;
 
@@ -2012,6 +2044,12 @@ static void d40_issue_pending(struct dma_chan *chan)
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 	unsigned long flags;
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Channel is not allocated!\n", __func__);
+		return;
+	}
+
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	/* Busy means that pending jobs are already being processed */
@@ -2027,6 +2065,12 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	unsigned long flags;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 
+	if (d40c->phy_chan == NULL) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Channel is not allocated!\n", __func__);
+		return -EINVAL;
+	}
+
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
 		spin_lock_irqsave(&d40c->lock, flags);
-- 
cgit v0.10.2


From 2292b880e411276d127a020a1c35ca149fcca992 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:39 +0000
Subject: DMAENGINE: ste_dma40: lock fix

Fix up some locking issues found by enabling lock debugging.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 4d56d21..ebffb53 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -441,6 +441,7 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
 		pool->base + d40c->phy_chan->num * 1024;
 	int i;
 	int lli_per_log = d40c->base->plat_data->llis_per_log;
+	unsigned long flags;
 
 	if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0)
 		return 0;
@@ -448,7 +449,7 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
 	if (pool->num_blocks > 32)
 		return -EINVAL;
 
-	spin_lock(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 
 	for (i = 0; i < pool->num_blocks; i++) {
 		if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) {
@@ -477,10 +478,10 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
 	d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1;
 
 
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 	return 0;
 err:
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 	return -EINVAL;
 }
 
@@ -488,15 +489,16 @@ static void d40_lcla_id_put(struct d40_chan *d40c,
 			    struct d40_lcla_pool *pool,
 			    int id)
 {
+	unsigned long flags;
 	if (id < 0)
 		return;
 
 	d40c->lcla.src_id = -1;
 	d40c->lcla.dst_id = -1;
 
-	spin_lock(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	pool->alloc_map[d40c->phy_chan->num] &= (~(0x1 << id));
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
 static int d40_channel_execute_command(struct d40_chan *d40c,
@@ -1984,8 +1986,6 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	if (d40d == NULL)
 		return NULL;
 
-	memset(d40d, 0, sizeof(struct d40_desc));
-
 	if (d40c->log_num != D40_PHY_CHAN)
 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
 					    direction, dma_flags);
-- 
cgit v0.10.2


From 0c32269d813c148194524fc8272f7ec1f7c90e6a Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:46 +0000
Subject: DMAENGINE: ste_dma40: avoid doing unnessecary suspend

Avoid doing unnessecary suspend when modifying logical channels.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index ebffb53..abbc1b6 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -595,6 +595,7 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 	u32 val;
 	unsigned long flags;
 
+	/* Notice, that disable requires the physical channel to be stopped */
 	if (do_enable)
 		val = D40_ACTIVATE_EVENTLINE;
 	else
@@ -740,18 +741,10 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 
 static int d40_start(struct d40_chan *d40c)
 {
-	int err;
-
-	if (d40c->log_num != D40_PHY_CHAN) {
-		err = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-		if (err)
-			return err;
+	if (d40c->log_num != D40_PHY_CHAN)
 		d40_config_set_event(d40c, true);
-	}
-
-	err = d40_channel_execute_command(d40c, D40_DMA_RUN);
 
-	return err;
+	return d40_channel_execute_command(d40c, D40_DMA_RUN);
 }
 
 static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
@@ -1340,7 +1333,6 @@ static bool d40_is_paused(struct d40_chan *d40c)
 	void __iomem *active_reg;
 	u32 status;
 	u32 event;
-	int res;
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
@@ -1359,10 +1351,6 @@ static bool d40_is_paused(struct d40_chan *d40c)
 		goto _exit;
 	}
 
-	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-	if (res != 0)
-		goto _exit;
-
 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM)
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
@@ -1379,12 +1367,6 @@ static bool d40_is_paused(struct d40_chan *d40c)
 
 	if (status != D40_DMA_RUN)
 		is_paused = true;
-
-	/* Resume the other logical channels if any */
-	if (d40_chan_has_events(d40c))
-		res = d40_channel_execute_command(d40c,
-						  D40_DMA_RUN);
-
 _exit:
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return is_paused;
@@ -1430,20 +1412,13 @@ static int d40_resume(struct dma_chan *chan)
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	if (d40c->log_num != D40_PHY_CHAN) {
-		res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-		if (res)
-			goto out;
-
-		/* If bytes left to transfer or linked tx resume job */
-		if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
+	/* If bytes left to transfer or linked tx resume job */
+	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
+		if (d40c->log_num != D40_PHY_CHAN)
 			d40_config_set_event(d40c, true);
-			res = d40_channel_execute_command(d40c, D40_DMA_RUN);
-		}
-	} else if (d40_residue(d40c) || d40_tx_is_linked(d40c))
 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
+	}
 
-out:
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return res;
 }
-- 
cgit v0.10.2


From 2123a61e174074b849fce2277412836b1b897942 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:25:54 +0000
Subject: DMAENGINE: ste_dma40: interrupts only on dst

We don't want interrupts when the source is done, only when
the destination is done and everything is complete at the
recieveing end of a transfer.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index abbc1b6..aa098a6 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1759,7 +1759,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 				 0,
 				 d40c->log_def.lcsp1,
 				 d40c->dma_cfg.src_info.data_width,
-				 true, true);
+				 false, true);
 
 		d40_log_fill_lli(d40d->lli_log.dst,
 				 dst,
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index e0194e4..772636b 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -315,11 +315,8 @@ int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
 	int total_size = 0;
 	struct scatterlist *current_sg = sg;
 	int i;
-	u32 next_lli_off_dst;
-	u32 next_lli_off_src;
-
-	next_lli_off_src = 0;
-	next_lli_off_dst = 0;
+	u32 next_lli_off_dst = 0;
+	u32 next_lli_off_src = 0;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
@@ -351,7 +348,7 @@ int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
 					 sg_dma_len(current_sg),
 					 next_lli_off_src,
 					 lcsp->lcsp1, src_data_width,
-					 term_int && !next_lli_off_src,
+					 false,
 					 true);
 			d40_log_fill_lli(&lli->dst[i],
 					 dev_addr,
@@ -375,7 +372,7 @@ int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
 					 sg_dma_len(current_sg),
 					 next_lli_off_src,
 					 lcsp->lcsp1, src_data_width,
-					 term_int && !next_lli_off_src,
+					 false,
 					 false);
 		}
 	}
-- 
cgit v0.10.2


From 1d392a7ba43300b0bde877de15121b261d7a6ce2 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:01 +0000
Subject: DMAENGINE: ste_dma40: no premature stop

Correct bug that could cause paused channels to stop.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index aa098a6..8c46bb8 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -508,6 +508,7 @@ static int d40_channel_execute_command(struct d40_chan *d40c,
 	void __iomem *active_reg;
 	int ret = 0;
 	unsigned long flags;
+	u32 wmask;
 
 	spin_lock_irqsave(&d40c->base->execmd_lock, flags);
 
@@ -525,7 +526,9 @@ static int d40_channel_execute_command(struct d40_chan *d40c,
 			goto done;
 	}
 
-	writel(command << D40_CHAN_POS(d40c->phy_chan->num), active_reg);
+	wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
+	writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
+	       active_reg);
 
 	if (command == D40_DMA_SUSPEND_REQ) {
 
-- 
cgit v0.10.2


From 508849ade23c1167bfbdf557259398adfe7044b9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:07 +0000
Subject: DMAENGINE: ste_dma40: allocate LCLA dynamically

Switch to allocating LCLA in memory instead of having a fixed
address.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8c46bb8..5748e96 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -30,6 +30,12 @@
 /* Maximum iterations taken before giving up suspending a channel */
 #define D40_SUSPEND_MAX_IT 500
 
+/* Hardware requirement on LCLA alignment */
+#define LCLA_ALIGNMENT 0x40000
+/* Attempts before giving up to trying to get pages that are aligned */
+#define MAX_LCLA_ALLOC_ATTEMPTS 256
+
+/* Bit markings for allocation map */
 #define D40_ALLOC_FREE		(1 << 31)
 #define D40_ALLOC_PHY		(1 << 30)
 #define D40_ALLOC_LOG_FREE	0
@@ -64,9 +70,9 @@ enum d40_command {
  */
 struct d40_lli_pool {
 	void	*base;
-	int	size;
+	int	 size;
 	/* Space for dst and src, plus an extra for padding */
-	u8	pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
+	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
 };
 
 /**
@@ -111,18 +117,20 @@ struct d40_desc {
 /**
  * struct d40_lcla_pool - LCLA pool settings and data.
  *
- * @base: The virtual address of LCLA.
- * @phy: Physical base address of LCLA.
- * @base_size: size of lcla.
+ * @base: The virtual address of LCLA. 18 bit aligned.
+ * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * This pointer is only there for clean-up on error.
+ * @pages: The number of pages needed for all physical channels.
+ * Only used later for clean-up on error
  * @lock: Lock to protect the content in this struct.
- * @alloc_map: Mapping between physical channel and LCLA entries.
+ * @alloc_map: Bitmap mapping between physical channel and LCLA entries.
  * @num_blocks: The number of entries of alloc_map. Equals to the
  * number of physical channels.
  */
 struct d40_lcla_pool {
 	void		*base;
-	dma_addr_t	 phy;
-	resource_size_t  base_size;
+	void		*base_unaligned;
+	int		 pages;
 	spinlock_t	 lock;
 	u32		*alloc_map;
 	int		 num_blocks;
@@ -432,13 +440,12 @@ static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
 
 /* Support functions for logical channels */
 
-static int d40_lcla_id_get(struct d40_chan *d40c,
-			   struct d40_lcla_pool *pool)
+static int d40_lcla_id_get(struct d40_chan *d40c)
 {
 	int src_id = 0;
 	int dst_id = 0;
 	struct d40_log_lli *lcla_lidx_base =
-		pool->base + d40c->phy_chan->num * 1024;
+		d40c->base->lcla_pool.base + d40c->phy_chan->num * 1024;
 	int i;
 	int lli_per_log = d40c->base->plat_data->llis_per_log;
 	unsigned long flags;
@@ -446,24 +453,28 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
 	if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0)
 		return 0;
 
-	if (pool->num_blocks > 32)
+	if (d40c->base->lcla_pool.num_blocks > 32)
 		return -EINVAL;
 
-	spin_lock_irqsave(&pool->lock, flags);
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
 
-	for (i = 0; i < pool->num_blocks; i++) {
-		if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) {
-			pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i);
+	for (i = 0; i < d40c->base->lcla_pool.num_blocks; i++) {
+		if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
+		      (0x1 << i))) {
+			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
+				(0x1 << i);
 			break;
 		}
 	}
 	src_id = i;
-	if (src_id >= pool->num_blocks)
+	if (src_id >= d40c->base->lcla_pool.num_blocks)
 		goto err;
 
-	for (; i < pool->num_blocks; i++) {
-		if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) {
-			pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i);
+	for (; i < d40c->base->lcla_pool.num_blocks; i++) {
+		if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
+		      (0x1 << i))) {
+			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
+				(0x1 << i);
 			break;
 		}
 	}
@@ -477,29 +488,13 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
 	d40c->lcla.dst = lcla_lidx_base + dst_id * lli_per_log + 1;
 	d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1;
 
-
-	spin_unlock_irqrestore(&pool->lock, flags);
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
 	return 0;
 err:
-	spin_unlock_irqrestore(&pool->lock, flags);
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
 	return -EINVAL;
 }
 
-static void d40_lcla_id_put(struct d40_chan *d40c,
-			    struct d40_lcla_pool *pool,
-			    int id)
-{
-	unsigned long flags;
-	if (id < 0)
-		return;
-
-	d40c->lcla.src_id = -1;
-	d40c->lcla.dst_id = -1;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	pool->alloc_map[d40c->phy_chan->num] &= (~(0x1 << id));
-	spin_unlock_irqrestore(&pool->lock, flags);
-}
 
 static int d40_channel_execute_command(struct d40_chan *d40c,
 				       enum d40_command command)
@@ -567,6 +562,7 @@ done:
 static void d40_term_all(struct d40_chan *d40c)
 {
 	struct d40_desc *d40d;
+	unsigned long flags;
 
 	/* Release active descriptors */
 	while ((d40d = d40_first_active_get(d40c))) {
@@ -584,10 +580,17 @@ static void d40_term_all(struct d40_chan *d40c)
 		d40_desc_free(d40c, d40d);
 	}
 
-	d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
-			d40c->lcla.src_id);
-	d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
-			d40c->lcla.dst_id);
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
+		(~(0x1 << d40c->lcla.dst_id));
+	d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
+		(~(0x1 << d40c->lcla.src_id));
+
+	d40c->lcla.src_id = -1;
+	d40c->lcla.dst_id = -1;
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
 
 	d40c->pending_tx = 0;
 	d40c->busy = false;
@@ -703,7 +706,6 @@ static int d40_config_write(struct d40_chan *d40c)
 
 static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 {
-
 	if (d40d->lli_phy.dst && d40d->lli_phy.src) {
 		d40_phy_lli_write(d40c->base->virtbase,
 				  d40c->phy_chan->num,
@@ -712,13 +714,24 @@ static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 	} else if (d40d->lli_log.dst && d40d->lli_log.src) {
 		struct d40_log_lli *src = d40d->lli_log.src;
 		struct d40_log_lli *dst = d40d->lli_log.dst;
+		int s;
 
 		src += d40d->lli_count;
 		dst += d40d->lli_count;
-		d40_log_lli_write(d40c->lcpa, d40c->lcla.src,
-				  d40c->lcla.dst,
-				  dst, src,
-				  d40c->base->plat_data->llis_per_log);
+		s = d40_log_lli_write(d40c->lcpa,
+				      d40c->lcla.src, d40c->lcla.dst,
+				      dst, src,
+				      d40c->base->plat_data->llis_per_log);
+
+		/* If s equals to zero, the job is not linked */
+		if (s > 0) {
+			(void) dma_map_single(d40c->base->dev, d40c->lcla.src,
+					      s * sizeof(struct d40_log_lli),
+					      DMA_TO_DEVICE);
+			(void) dma_map_single(d40c->base->dev, d40c->lcla.dst,
+					      s * sizeof(struct d40_log_lli),
+					      DMA_TO_DEVICE);
+		}
 	}
 	d40d->lli_count += d40d->lli_tx_len;
 }
@@ -930,7 +943,8 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data)
 		if (!il[row].is_error)
 			dma_tc_handle(d40c);
 		else
-			dev_err(base->dev, "[%s] IRQ chan: %ld offset %d idx %d\n",
+			dev_err(base->dev,
+				"[%s] IRQ chan: %ld offset %d idx %d\n",
 				__func__, chan, il[row].offset, idx);
 
 		spin_unlock(&d40c->lock);
@@ -1089,7 +1103,8 @@ static int d40_allocate_channel(struct d40_chan *d40c)
 	int j;
 	int log_num;
 	bool is_src;
-	bool is_log = (d40c->dma_cfg.channel_type & STEDMA40_CHANNEL_IN_OPER_MODE)
+	bool is_log = (d40c->dma_cfg.channel_type &
+		       STEDMA40_CHANNEL_IN_OPER_MODE)
 		== STEDMA40_CHANNEL_IN_LOG_MODE;
 
 
@@ -1124,8 +1139,10 @@ static int d40_allocate_channel(struct d40_chan *d40c)
 			for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
 				int phy_num = j  + event_group * 2;
 				for (i = phy_num; i < phy_num + 2; i++) {
-					if (d40_alloc_mask_set(&phys[i], is_src,
-							       0, is_log))
+					if (d40_alloc_mask_set(&phys[i],
+							       is_src,
+							       0,
+							       is_log))
 						goto found_phy;
 				}
 			}
@@ -1396,13 +1413,14 @@ static u32 d40_residue(struct d40_chan *d40c)
 	u32 num_elt;
 
 	if (d40c->log_num != D40_PHY_CHAN)
-		num_elt = (readl(&d40c->lcpa->lcsp2) &  D40_MEM_LCSP2_ECNT_MASK)
+		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
 			>> D40_MEM_LCSP2_ECNT_POS;
 	else
 		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
 				 d40c->phy_chan->num * D40_DREG_PCDELTA +
 				 D40_CHAN_REG_SDELT) &
-			   D40_SREG_ELEM_PHY_ECNT_MASK) >> D40_SREG_ELEM_PHY_ECNT_POS;
+			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
+			D40_SREG_ELEM_PHY_ECNT_POS;
 	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
 }
 
@@ -1455,8 +1473,10 @@ int stedma40_set_psize(struct dma_chan *chan,
 	if (d40c->log_num != D40_PHY_CHAN) {
 		d40c->log_def.lcsp1 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
 		d40c->log_def.lcsp3 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
-		d40c->log_def.lcsp1 |= src_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
-		d40c->log_def.lcsp3 |= dst_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
+		d40c->log_def.lcsp1 |= src_psize <<
+			D40_MEM_LCSP1_SCFG_PSIZE_POS;
+		d40c->log_def.lcsp3 |= dst_psize <<
+			D40_MEM_LCSP1_SCFG_PSIZE_POS;
 		goto out;
 	}
 
@@ -1521,8 +1541,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 			 * split list into 1-length and run only in lcpa
 			 * space.
 			 */
-			if (d40_lcla_id_get(d40c,
-					    &d40c->base->lcla_pool) != 0)
+			if (d40_lcla_id_get(d40c) != 0)
 				d40d->lli_tx_len = 1;
 
 		if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
@@ -1849,7 +1868,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 		 * If not, split list into 1-length and run only
 		 * in lcpa space.
 		 */
-		if (d40_lcla_id_get(d40c, &d40c->base->lcla_pool) != 0)
+		if (d40_lcla_id_get(d40c) != 0)
 			d40d->lli_tx_len = 1;
 
 	if (direction == DMA_FROM_DEVICE)
@@ -2476,6 +2495,78 @@ static void __init d40_hw_init(struct d40_base *base)
 
 }
 
+static int __init d40_lcla_allocate(struct d40_base *base)
+{
+	unsigned long *page_list;
+	int i, j;
+	int ret = 0;
+
+	/*
+	 * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
+	 * To full fill this hardware requirement without wasting 256 kb
+	 * we allocate pages until we get an aligned one.
+	 */
+	page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS,
+			    GFP_KERNEL);
+
+	if (!page_list) {
+		ret = -ENOMEM;
+		goto failure;
+	}
+
+	/* Calculating how many pages that are required */
+	base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
+
+	for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
+		page_list[i] = __get_free_pages(GFP_KERNEL,
+						base->lcla_pool.pages);
+		if (!page_list[i]) {
+
+			dev_err(base->dev,
+				"[%s] Failed to allocate %d pages.\n",
+				__func__, base->lcla_pool.pages);
+
+			for (j = 0; j < i; j++)
+				free_pages(page_list[j], base->lcla_pool.pages);
+			goto failure;
+		}
+
+		if ((virt_to_phys((void *)page_list[i]) &
+		     (LCLA_ALIGNMENT - 1)) == 0)
+			break;
+	}
+
+	for (j = 0; j < i; j++)
+		free_pages(page_list[j], base->lcla_pool.pages);
+
+	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
+		base->lcla_pool.base = (void *)page_list[i];
+	} else {
+		/* After many attempts, no succees with finding the correct
+		 * alignment try with allocating a big buffer */
+		dev_warn(base->dev,
+			 "[%s] Failed to get %d pages @ 18 bit align.\n",
+			 __func__, base->lcla_pool.pages);
+		base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
+							 base->num_phy_chans +
+							 LCLA_ALIGNMENT,
+							 GFP_KERNEL);
+		if (!base->lcla_pool.base_unaligned) {
+			ret = -ENOMEM;
+			goto failure;
+		}
+
+		base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
+						 LCLA_ALIGNMENT);
+	}
+
+	writel(virt_to_phys(base->lcla_pool.base),
+	       base->virtbase + D40_DREG_LCLA);
+failure:
+	kfree(page_list);
+	return ret;
+}
+
 static int __init d40_probe(struct platform_device *pdev)
 {
 	int err;
@@ -2535,41 +2626,11 @@ static int __init d40_probe(struct platform_device *pdev)
 			__func__);
 		goto failure;
 	}
-	/* Get IO for logical channel link address */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcla");
-	if (!res) {
-		ret = -ENOENT;
-		dev_err(&pdev->dev,
-			"[%s] No \"lcla\" resource defined\n",
-			__func__);
-		goto failure;
-	}
 
-	base->lcla_pool.base_size = resource_size(res);
-	base->lcla_pool.phy = res->start;
-
-	if (request_mem_region(res->start, resource_size(res),
-			       D40_NAME " I/O lcla") == NULL) {
-		ret = -EBUSY;
-		dev_err(&pdev->dev,
-			"[%s] Failed to request LCLA region 0x%x-0x%x\n",
-			__func__, res->start, res->end);
-		goto failure;
-	}
-	val = readl(base->virtbase + D40_DREG_LCLA);
-	if (res->start != val && val != 0) {
-		dev_warn(&pdev->dev,
-			 "[%s] Mismatch LCLA dma 0x%x, def 0x%x\n",
-			 __func__, val, res->start);
-	} else
-		writel(res->start, base->virtbase + D40_DREG_LCLA);
-
-	base->lcla_pool.base = ioremap(res->start, resource_size(res));
-	if (!base->lcla_pool.base) {
-		ret = -ENOMEM;
-		dev_err(&pdev->dev,
-			"[%s] Failed to ioremap LCLA 0x%x-0x%x\n",
-			__func__, res->start, res->end);
+	ret = d40_lcla_allocate(base);
+	if (ret) {
+		dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
+			__func__);
 		goto failure;
 	}
 
@@ -2601,9 +2662,11 @@ failure:
 			kmem_cache_destroy(base->desc_slab);
 		if (base->virtbase)
 			iounmap(base->virtbase);
-		if (base->lcla_pool.phy)
-			release_mem_region(base->lcla_pool.phy,
-					   base->lcla_pool.base_size);
+		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+			free_pages((unsigned long)base->lcla_pool.base,
+				   base->lcla_pool.pages);
+		if (base->lcla_pool.base_unaligned)
+			kfree(base->lcla_pool.base_unaligned);
 		if (base->phy_lcpa)
 			release_mem_region(base->phy_lcpa,
 					   base->lcpa_size);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 772636b..d937f76 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -420,7 +420,7 @@ int d40_log_sg_to_lli(int lcla_id,
 	return total_size;
 }
 
-void d40_log_lli_write(struct d40_log_lli_full *lcpa,
+int d40_log_lli_write(struct d40_log_lli_full *lcpa,
 		       struct d40_log_lli *lcla_src,
 		       struct d40_log_lli *lcla_dst,
 		       struct d40_log_lli *lli_dst,
@@ -448,4 +448,7 @@ void d40_log_lli_write(struct d40_log_lli_full *lcpa,
 		slos = lli_src[i + 1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
 		dlos = lli_dst[i + 1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
 	}
+
+	return i;
+
 }
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index c081f28..9c0fa2f 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -339,12 +339,12 @@ int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
 		      bool term_int, dma_addr_t dev_addr, int max_len,
 		      int llis_per_log);
 
-void d40_log_lli_write(struct d40_log_lli_full *lcpa,
-		       struct d40_log_lli *lcla_src,
-		       struct d40_log_lli *lcla_dst,
-		       struct d40_log_lli *lli_dst,
-		       struct d40_log_lli *lli_src,
-		       int llis_per_log);
+int d40_log_lli_write(struct d40_log_lli_full *lcpa,
+		      struct d40_log_lli *lcla_src,
+		      struct d40_log_lli *lcla_dst,
+		      struct d40_log_lli *lli_dst,
+		      struct d40_log_lli *lli_src,
+		      int llis_per_log);
 
 int d40_log_sg_to_lli(int lcla_id,
 		      struct scatterlist *sg,
-- 
cgit v0.10.2


From 5aa12e8c9c57741606e52f43e62ab1b9dc8e9dcc Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:14 +0000
Subject: DMAENGINE: ste_dma40: arch updates for LCLA and LCPA

This follows on the patch to allocate LCLA dynamically: the
on-chip memory is needed for other things so now that that
we're using it dynamically we can remove the LCLA resource
altogether and free up some ESRAM memory.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 8229034..638964e 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -113,26 +113,21 @@ struct platform_device u8500_i2c4_device = {
 static struct resource dma40_resources[] = {
 	[0] = {
 		.start = U8500_DMA_BASE,
-		.end = U8500_DMA_BASE + SZ_4K - 1,
+		.end   = U8500_DMA_BASE + SZ_4K - 1,
 		.flags = IORESOURCE_MEM,
-		.name = "base",
+		.name  = "base",
 	},
 	[1] = {
 		.start = U8500_DMA_LCPA_BASE,
-		.end = U8500_DMA_LCPA_BASE + SZ_4K - 1,
+		.end   = U8500_DMA_LCPA_BASE + 2 * SZ_1K - 1,
 		.flags = IORESOURCE_MEM,
-		.name = "lcpa",
+		.name  = "lcpa",
 	},
 	[2] = {
-		.start = U8500_DMA_LCLA_BASE,
-		.end = U8500_DMA_LCLA_BASE + 16 * 1024 - 1,
-		.flags = IORESOURCE_MEM,
-		.name = "lcla",
-	},
-	[3] = {
 		.start = IRQ_DMA,
-		.end = IRQ_DMA,
-		.flags = IORESOURCE_IRQ}
+		.end   = IRQ_DMA,
+		.flags = IORESOURCE_IRQ,
+	}
 };
 
 /* Default configuration for physcial memcpy */
diff --git a/arch/arm/mach-ux500/include/mach/db8500-regs.h b/arch/arm/mach-ux500/include/mach/db8500-regs.h
index 85fc6a8..897fa35 100644
--- a/arch/arm/mach-ux500/include/mach/db8500-regs.h
+++ b/arch/arm/mach-ux500/include/mach/db8500-regs.h
@@ -15,9 +15,8 @@
 #define U8500_ESRAM_BANK2	(U8500_ESRAM_BANK1 + U8500_ESRAM_BANK_SIZE)
 #define U8500_ESRAM_BANK3	(U8500_ESRAM_BANK2 + U8500_ESRAM_BANK_SIZE)
 #define U8500_ESRAM_BANK4	(U8500_ESRAM_BANK3 + U8500_ESRAM_BANK_SIZE)
-/* Use bank 4 for DMA LCLA and LCPA */
-#define U8500_DMA_LCLA_BASE	U8500_ESRAM_BANK4
-#define U8500_DMA_LCPA_BASE	(U8500_ESRAM_BANK4 + 0x4000)
+/* Use bank 4 for DMA LCPA */
+#define U8500_DMA_LCPA_BASE	U8500_ESRAM_BANK4
 
 #define U8500_PER3_BASE		0x80000000
 #define U8500_STM_BASE		0x80100000
-- 
cgit v0.10.2


From ef934aea861323820cb39051e94a52319f456fbe Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:22 +0000
Subject: DMAENGINE: ste_dma40: no flow control on memcpy

On memcpy DMA operations we don't need flow control.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 638964e..8d94bc6 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -140,11 +140,12 @@ struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
 	.src_info.endianess = STEDMA40_LITTLE_ENDIAN,
 	.src_info.data_width = STEDMA40_BYTE_WIDTH,
 	.src_info.psize = STEDMA40_PSIZE_PHY_1,
+	.src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
 
 	.dst_info.endianess = STEDMA40_LITTLE_ENDIAN,
 	.dst_info.data_width = STEDMA40_BYTE_WIDTH,
 	.dst_info.psize = STEDMA40_PSIZE_PHY_1,
-
+	.dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
 };
 /* Default configuration for logical memcpy */
 struct stedma40_chan_cfg dma40_memcpy_conf_log = {
@@ -157,11 +158,12 @@ struct stedma40_chan_cfg dma40_memcpy_conf_log = {
 	.src_info.endianess = STEDMA40_LITTLE_ENDIAN,
 	.src_info.data_width = STEDMA40_BYTE_WIDTH,
 	.src_info.psize = STEDMA40_PSIZE_LOG_1,
+	.src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
 
 	.dst_info.endianess = STEDMA40_LITTLE_ENDIAN,
 	.dst_info.data_width = STEDMA40_BYTE_WIDTH,
 	.dst_info.psize = STEDMA40_PSIZE_LOG_1,
-
+	.dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
 };
 
 /*
-- 
cgit v0.10.2


From 8bc68fa51d7ed520cd4c69f74fb37add60b6ea47 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:30 +0000
Subject: DMAENGINE: ste_dma40: add DB8500 memcpy channels

This adds all available memcpy channels to the DB8500 in the
ux500 platform (and fixes a typo).

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 8d94bc6..a001e4c0 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -177,10 +177,12 @@ static const dma_addr_t dma40_rx_map[STEDMA40_NR_DEV];
 
 /* Reserved event lines for memcpy only */
 static int dma40_memcpy_event[] = {
+	STEDMA40_MEMCPY_TX_0,
 	STEDMA40_MEMCPY_TX_1,
 	STEDMA40_MEMCPY_TX_2,
 	STEDMA40_MEMCPY_TX_3,
 	STEDMA40_MEMCPY_TX_4,
+	STEDMA40_MEMCPY_TX_5,
 };
 
 static struct stedma40_platform_data dma40_plat_data = {
diff --git a/arch/arm/mach-ux500/ste-dma40-db8500.h b/arch/arm/mach-ux500/ste-dma40-db8500.h
index e701627..9d9d379 100644
--- a/arch/arm/mach-ux500/ste-dma40-db8500.h
+++ b/arch/arm/mach-ux500/ste-dma40-db8500.h
@@ -136,7 +136,7 @@ enum dma_dest_dev_type {
 	STEDMA40_DEV_CAC1_TX = 48,
 	STEDMA40_DEV_CAC1_TX_HAC1_TX = 49,
 	STEDMA40_DEV_HAC1_TX = 50,
-	STEDMA40_MEMXCPY_TX_0 = 51,
+	STEDMA40_MEMCPY_TX_0 = 51,
 	STEDMA40_DEV_SLIM1_CH0_TX_HSI_TX_CH4 = 52,
 	STEDMA40_DEV_SLIM1_CH1_TX_HSI_TX_CH5 = 53,
 	STEDMA40_DEV_SLIM1_CH2_TX_HSI_TX_CH6 = 54,
-- 
cgit v0.10.2


From d181b3a8cb2fc1732ad1826a5e6fdccab03e6a51 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:38 +0000
Subject: DMAENGINE: ste_dma40: fix suspend bug

This fixes a bug when suspending channels: first make the checks,
then suspend the channel, not the other way around.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5748e96..e14c395 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1220,7 +1220,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 {
 
 	int res = 0;
-	u32 event, dir;
+	u32 event;
 	struct d40_phy_res *phy = d40c->phy_chan;
 	bool is_src;
 	struct d40_desc *d;
@@ -1252,21 +1252,12 @@ static int d40_free_dma(struct d40_chan *d40c)
 		return -EINVAL;
 	}
 
-	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
-	if (res) {
-		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
-			__func__);
-		return res;
-	}
-
 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
-		dir = D40_CHAN_REG_SDLNK;
 		is_src = false;
 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
-		dir = D40_CHAN_REG_SSLNK;
 		is_src = true;
 	} else {
 		dev_err(&d40c->chan.dev->device,
@@ -1274,16 +1265,17 @@ static int d40_free_dma(struct d40_chan *d40c)
 		return -EINVAL;
 	}
 
+	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
+	if (res) {
+		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
+			__func__);
+		return res;
+	}
+
 	if (d40c->log_num != D40_PHY_CHAN) {
-		/*
-		 * Release logical channel, deactivate the event line during
-		 * the time physical res is suspended.
-		 */
-		writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event)) &
-		       D40_EVENTLINE_MASK(event),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       phy->num * D40_DREG_PCDELTA + dir);
+		/* Release logical channel, deactivate the event line */
 
+		d40_config_set_event(d40c, false);
 		d40c->base->lookup_log_chans[d40c->log_num] = NULL;
 
 		/*
@@ -1304,8 +1296,9 @@ static int d40_free_dma(struct d40_chan *d40c)
 			}
 			return 0;
 		}
-	} else
-		d40_alloc_mask_free(phy, is_src, 0);
+	} else {
+		(void) d40_alloc_mask_free(phy, is_src, 0);
+	}
 
 	/* Release physical channel */
 	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
-- 
cgit v0.10.2


From 595167253a20167efae704ff2a8f6e2ee66cf25f Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:45 +0000
Subject: DMAENGINE: ste_dma40: no disabled phy channels on ux500

Make sure we have no disabled physical channels when we start on
the ux500 machines.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index a001e4c0..e2b7c31 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -194,6 +194,7 @@ static struct stedma40_platform_data dma40_plat_data = {
 	.memcpy_conf_phy = &dma40_memcpy_conf_phy,
 	.memcpy_conf_log = &dma40_memcpy_conf_log,
 	.llis_per_log = 8,
+	.disabled_channels = {-1},
 };
 
 struct platform_device u8500_dma40_device = {
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 4d12ea4..5fbde4b 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -148,7 +148,8 @@ struct stedma40_chan_cfg {
  * @memcpy_conf_phy: default configuration of physical channel memcpy
  * @memcpy_conf_log: default configuration of logical channel memcpy
  * @llis_per_log: number of max linked list items per logical channel
- *
+ * @disabled_channels: A vector, ending with -1, that marks physical channels
+ * that are for different reasons not available for the driver.
  */
 struct stedma40_platform_data {
 	u32				 dev_len;
@@ -159,6 +160,7 @@ struct stedma40_platform_data {
 	struct stedma40_chan_cfg	*memcpy_conf_phy;
 	struct stedma40_chan_cfg	*memcpy_conf_log;
 	unsigned int			 llis_per_log;
+	int				 disabled_channels[8];
 };
 
 /**
-- 
cgit v0.10.2


From 6b7acd84426235c63a3c0f0b230a95064f97b0d4 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Sun, 20 Jun 2010 21:26:59 +0000
Subject: DMAENGINE: ste_dma40: support disabling physical channels

This makes it possible to disable physical channels.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index e14c395..1d21fbd 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2230,6 +2230,14 @@ static int __init d40_phy_res_init(struct d40_base *base)
 		}
 		spin_lock_init(&base->phy_res[i].lock);
 	}
+
+	/* Mark disabled channels as occupied */
+	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
+			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
+			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
+			num_phy_chans_avail--;
+	}
+
 	dev_info(base->dev, "%d of %d physical DMA channels available\n",
 		 num_phy_chans_avail, base->num_phy_chans);
 
-- 
cgit v0.10.2


From f41855929c9fdc3b4f2863ada9df3e0cf4231b5b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 22 Jun 2010 18:06:42 -0700
Subject: DMAENGINE: ste_dma40: support older silicon

This makes sure the DMA40 driver will also work on the oldest
silicon revisions that have the on-chip memory on another location
in the DB8500 and also requires explicit suspend before starting
or resuming a logical channel.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
[added parenthesis to the definition of U8500_DMA_LCPA_BASE_ED]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index e2b7c31..c62cdbe 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -213,4 +213,6 @@ void dma40_u8500ed_fixup(void)
 	dma40_plat_data.memcpy_len = 0;
 	dma40_resources[0].start = U8500_DMA_BASE_ED;
 	dma40_resources[0].end = U8500_DMA_BASE_ED + SZ_4K - 1;
+	dma40_resources[1].start = U8500_DMA_LCPA_BASE_ED;
+	dma40_resources[1].end = U8500_DMA_LCPA_BASE_ED + 2 * SZ_1K - 1;
 }
diff --git a/arch/arm/mach-ux500/include/mach/db8500-regs.h b/arch/arm/mach-ux500/include/mach/db8500-regs.h
index 897fa35..f000218 100644
--- a/arch/arm/mach-ux500/include/mach/db8500-regs.h
+++ b/arch/arm/mach-ux500/include/mach/db8500-regs.h
@@ -17,6 +17,7 @@
 #define U8500_ESRAM_BANK4	(U8500_ESRAM_BANK3 + U8500_ESRAM_BANK_SIZE)
 /* Use bank 4 for DMA LCPA */
 #define U8500_DMA_LCPA_BASE	U8500_ESRAM_BANK4
+#define U8500_DMA_LCPA_BASE_ED	(U8500_ESRAM_BANK4 + 0x4000)
 
 #define U8500_PER3_BASE		0x80000000
 #define U8500_STM_BASE		0x80100000
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 1d21fbd..21a7597 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -218,6 +218,7 @@ struct d40_chan {
  * the same physical register.
  * @dev: The device structure.
  * @virtbase: The virtual base address of the DMA's register.
+ * @rev: silicon revision detected.
  * @clk: Pointer to the DMA clock structure.
  * @phy_start: Physical memory start of the DMA registers.
  * @phy_size: Size of the DMA register map.
@@ -250,6 +251,7 @@ struct d40_base {
 	spinlock_t			 execmd_lock;
 	struct device			 *dev;
 	void __iomem			 *virtbase;
+	u8				  rev:4;
 	struct clk			 *clk;
 	phys_addr_t			  phy_start;
 	resource_size_t			  phy_size;
@@ -757,6 +759,17 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 
 static int d40_start(struct d40_chan *d40c)
 {
+	if (d40c->base->rev == 0) {
+		int err;
+
+		if (d40c->log_num != D40_PHY_CHAN) {
+			err = d40_channel_execute_command(d40c,
+							  D40_DMA_SUSPEND_REQ);
+			if (err)
+				return err;
+		}
+	}
+
 	if (d40c->log_num != D40_PHY_CHAN)
 		d40_config_set_event(d40c, true);
 
@@ -1426,6 +1439,13 @@ static int d40_resume(struct dma_chan *chan)
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
+	if (d40c->base->rev == 0)
+		if (d40c->log_num != D40_PHY_CHAN) {
+			res = d40_channel_execute_command(d40c,
+							  D40_DMA_SUSPEND_REQ);
+			goto no_suspend;
+		}
+
 	/* If bytes left to transfer or linked tx resume job */
 	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
 		if (d40c->log_num != D40_PHY_CHAN)
@@ -1433,6 +1453,7 @@ static int d40_resume(struct dma_chan *chan)
 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
 	}
 
+no_suspend:
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return res;
 }
@@ -2286,6 +2307,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	int num_log_chans = 0;
 	int num_phy_chans;
 	int i;
+	u32 val;
 
 	clk = clk_get(&pdev->dev, NULL);
 
@@ -2324,12 +2346,13 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		}
 	}
 
-	i = readl(virtbase + D40_DREG_PERIPHID2);
+	/* Get silicon revision */
+	val = readl(virtbase + D40_DREG_PERIPHID2);
 
-	if ((i & 0xf) != D40_PERIPHID2_DESIGNER) {
+	if ((val & 0xf) != D40_PERIPHID2_DESIGNER) {
 		dev_err(&pdev->dev,
 			"[%s] Unknown designer! Got %x wanted %x\n",
-			__func__, i & 0xf, D40_PERIPHID2_DESIGNER);
+			__func__, val & 0xf, D40_PERIPHID2_DESIGNER);
 		goto failure;
 	}
 
@@ -2337,7 +2360,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
 
 	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
-		 (i >> 4) & 0xf, res->start);
+		 (val >> 4) & 0xf, res->start);
 
 	plat_data = pdev->dev.platform_data;
 
@@ -2359,6 +2382,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		goto failure;
 	}
 
+	base->rev = (val >> 4) & 0xf;
 	base->clk = clk;
 	base->num_phy_chans = num_phy_chans;
 	base->num_log_chans = num_log_chans;
-- 
cgit v0.10.2


From efcc28981745bc6aca88acb2d4d37d87f090a80a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 25 May 2010 11:55:06 +0200
Subject: dma/timb_dma: compile warning on 32 bit

This silences a compile warning on 32 bit systems:
drivers/dma/timb_dma.c:203: warning: cast to pointer from integer of different size

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a1bf77c..464fad5 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -200,8 +200,8 @@ static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
 		return -EINVAL;
 	}
 
-	dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: %p\n",
-		dma_desc, (void *)sg_dma_address(sg));
+	dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n",
+		dma_desc, (unsigned long long)sg_dma_address(sg));
 
 	dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff;
 	dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff;
-- 
cgit v0.10.2


From 485680050166dc8c6ac976346430ab1f453c228b Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 27 May 2010 14:33:17 +0200
Subject: drivers/dma: Eliminate a NULL pointer dereference

If td_desc is NULL, just skip both kfrees.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
expression E,E1;
identifier f;
statement S1,S2,S3;
@@

if ((E == NULL && ...) || ...)
{
  ... when != if (...) S1 else S2
      when != E = E1
* E->f
  ... when any
  return ...;
}
else S3
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 464fad5..2ec1ed5 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -382,7 +382,7 @@ static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
 	td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL);
 	if (!td_desc) {
 		dev_err(chan2dev(chan), "Failed to alloc descriptor\n");
-		goto err;
+		goto out;
 	}
 
 	td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE;
@@ -410,7 +410,7 @@ static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
 err:
 	kfree(td_desc->desc_list);
 	kfree(td_desc);
-
+out:
 	return NULL;
 
 }
-- 
cgit v0.10.2


From b3c567e474b5ba4447b6e16063a3b0cffc22d205 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Wed, 21 Jul 2010 13:28:10 +0530
Subject: intel_mid: Add Mrst & Mfld DMA Drivers

This patch add DMA drivers for DMA controllers in Langwell chipset
of Intel(R) Moorestown platform and DMA controllers in Penwell of
Intel(R) Medfield platfrom

This patch adds support for Moorestown DMAC1 and DMAC2 controllers.
It also add support for Medfiled GP DMA and DMAC1 controllers.
These controllers supports memory to peripheral and peripheral to
memory transfers. It support only single block transfers.

This driver is based on Kernel DMA engine
Anyone who wishes to use this controller should use DMA engine APIs

This controller exposes DMA_SLAVE capabilities and notifies the client drivers
of DMA transaction completion

Config option required to be enabled CONFIG_INTEL_MID_DMAC=y

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9e01e96..6a3e5bf 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -33,6 +33,19 @@ if DMADEVICES
 
 comment "DMA Devices"
 
+config INTEL_MID_DMAC
+	tristate "Intel MID DMA support for Peripheral DMA controllers"
+	depends on PCI && X86
+	select DMA_ENGINE
+	default n
+	help
+	  Enable support for the Intel(R) MID DMA engine present
+	  in Intel MID chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
 config ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 0fe5ebb..27b6c69 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -7,6 +7,7 @@ endif
 
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
new file mode 100644
index 0000000..c2591e8
--- /dev/null
+++ b/drivers/dma/intel_mid_dma.c
@@ -0,0 +1,1143 @@
+/*
+ *  intel_mid_dma.c - Intel Langwell DMA Drivers
+ *
+ *  Copyright (C) 2008-10 Intel Corp
+ *  Author: Vinod Koul <vinod.koul@intel.com>
+ *  The driver design is based on dw_dmac driver
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *
+ */
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/intel_mid_dma.h>
+
+#define MAX_CHAN	4 /*max ch across controllers*/
+#include "intel_mid_dma_regs.h"
+
+#define INTEL_MID_DMAC1_ID		0x0814
+#define INTEL_MID_DMAC2_ID		0x0813
+#define INTEL_MID_GP_DMAC2_ID		0x0827
+#define INTEL_MFLD_DMAC1_ID		0x0830
+#define LNW_PERIPHRAL_MASK_BASE		0xFFAE8008
+#define LNW_PERIPHRAL_MASK_SIZE		0x10
+#define LNW_PERIPHRAL_STATUS		0x0
+#define LNW_PERIPHRAL_MASK		0x8
+
+struct intel_mid_dma_probe_info {
+	u8 max_chan;
+	u8 ch_base;
+	u16 block_size;
+	u32 pimr_mask;
+};
+
+#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \
+	((kernel_ulong_t)&(struct intel_mid_dma_probe_info) {	\
+		.max_chan = (_max_chan),			\
+		.ch_base = (_ch_base),				\
+		.block_size = (_block_size),			\
+		.pimr_mask = (_pimr_mask),			\
+	})
+
+/*****************************************************************************
+Utility Functions*/
+/**
+ * get_ch_index	-	convert status to channel
+ * @status: status mask
+ * @base: dma ch base value
+ *
+ * Modify the status mask and return the channel index needing
+ * attention (or -1 if neither)
+ */
+static int get_ch_index(int *status, unsigned int base)
+{
+	int i;
+	for (i = 0; i < MAX_CHAN; i++) {
+		if (*status & (1 << (i + base))) {
+			*status = *status & ~(1 << (i + base));
+			pr_debug("MDMA: index %d New status %x\n", i, *status);
+			return i;
+		}
+	}
+	return -1;
+}
+
+/**
+ * get_block_ts	-	calculates dma transaction length
+ * @len: dma transfer length
+ * @tx_width: dma transfer src width
+ * @block_size: dma controller max block size
+ *
+ * Based on src width calculate the DMA trsaction length in data items
+ * return data items or FFFF if exceeds max length for block
+ */
+static int get_block_ts(int len, int tx_width, int block_size)
+{
+	int byte_width = 0, block_ts = 0;
+
+	switch (tx_width) {
+	case LNW_DMA_WIDTH_8BIT:
+		byte_width = 1;
+		break;
+	case LNW_DMA_WIDTH_16BIT:
+		byte_width = 2;
+		break;
+	case LNW_DMA_WIDTH_32BIT:
+	default:
+		byte_width = 4;
+		break;
+	}
+
+	block_ts = len/byte_width;
+	if (block_ts > block_size)
+		block_ts = 0xFFFF;
+	return block_ts;
+}
+
+/*****************************************************************************
+DMAC1 interrupt Functions*/
+
+/**
+ * dmac1_mask_periphral_intr -	mask the periphral interrupt
+ * @midc: dma channel for which masking is required
+ *
+ * Masks the DMA periphral interrupt
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void dmac1_mask_periphral_intr(struct intel_mid_dma_chan *midc)
+{
+	u32 pimr;
+	struct middma_device *mid = to_middma_device(midc->chan.device);
+
+	if (mid->pimr_mask) {
+		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
+		pimr |= mid->pimr_mask;
+		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
+	}
+	return;
+}
+
+/**
+ * dmac1_unmask_periphral_intr -	unmask the periphral interrupt
+ * @midc: dma channel for which masking is required
+ *
+ * UnMasks the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc)
+{
+	u32 pimr;
+	struct middma_device *mid = to_middma_device(midc->chan.device);
+
+	if (mid->pimr_mask) {
+		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
+		pimr &= ~mid->pimr_mask;
+		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
+	}
+	return;
+}
+
+/**
+ * enable_dma_interrupt -	enable the periphral interrupt
+ * @midc: dma channel for which enable interrupt is required
+ *
+ * Enable the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void enable_dma_interrupt(struct intel_mid_dma_chan *midc)
+{
+	dmac1_unmask_periphral_intr(midc);
+
+	/*en ch interrupts*/
+	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
+	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
+	return;
+}
+
+/**
+ * disable_dma_interrupt -	disable the periphral interrupt
+ * @midc: dma channel for which disable interrupt is required
+ *
+ * Disable the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void disable_dma_interrupt(struct intel_mid_dma_chan *midc)
+{
+	/*Check LPE PISR, make sure fwd is disabled*/
+	dmac1_mask_periphral_intr(midc);
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK);
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
+	return;
+}
+
+/*****************************************************************************
+DMA channel helper Functions*/
+/**
+ * mid_desc_get		-	get a descriptor
+ * @midc: dma channel for which descriptor is required
+ *
+ * Obtain a descriptor for the channel. Returns NULL if none are free.
+ * Once the descriptor is returned it is private until put on another
+ * list or freed
+ */
+static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc)
+{
+	struct intel_mid_dma_desc *desc, *_desc;
+	struct intel_mid_dma_desc *ret = NULL;
+
+	spin_lock_bh(&midc->lock);
+	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+	}
+	spin_unlock_bh(&midc->lock);
+	return ret;
+}
+
+/**
+ * mid_desc_put		-	put a descriptor
+ * @midc: dma channel for which descriptor is required
+ * @desc: descriptor to put
+ *
+ * Return a descriptor from lwn_desc_get back to the free pool
+ */
+static void midc_desc_put(struct intel_mid_dma_chan *midc,
+			struct intel_mid_dma_desc *desc)
+{
+	if (desc) {
+		spin_lock_bh(&midc->lock);
+		list_add_tail(&desc->desc_node, &midc->free_list);
+		spin_unlock_bh(&midc->lock);
+	}
+}
+/**
+ * midc_dostart		-		begin a DMA transaction
+ * @midc: channel for which txn is to be started
+ * @first: first descriptor of series
+ *
+ * Load a transaction into the engine. This must be called with midc->lock
+ * held and bh disabled.
+ */
+static void midc_dostart(struct intel_mid_dma_chan *midc,
+			struct intel_mid_dma_desc *first)
+{
+	struct middma_device *mid = to_middma_device(midc->chan.device);
+
+	/*  channel is idle */
+	if (midc->in_use && test_ch_en(midc->dma_base, midc->ch_id)) {
+		/*error*/
+		pr_err("ERR_MDMA: channel is busy in start\n");
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	/*write registers and en*/
+	iowrite32(first->sar, midc->ch_regs + SAR);
+	iowrite32(first->dar, midc->ch_regs + DAR);
+	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
+	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
+	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
+	iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH);
+	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
+		(int)first->sar, (int)first->dar, first->cfg_hi,
+		first->cfg_lo, first->ctl_hi, first->ctl_lo);
+
+	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
+	first->status = DMA_IN_PROGRESS;
+}
+
+/**
+ * midc_descriptor_complete	-	process completed descriptor
+ * @midc: channel owning the descriptor
+ * @desc: the descriptor itself
+ *
+ * Process a completed descriptor and perform any callbacks upon
+ * the completion. The completion handling drops the lock during the
+ * callbacks but must be called with the lock held.
+ */
+static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
+	       struct intel_mid_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	dma_async_tx_callback callback_txd = NULL;
+	void *param_txd = NULL;
+
+	midc->completed = txd->cookie;
+	callback_txd = txd->callback;
+	param_txd = txd->callback_param;
+
+	list_move(&desc->desc_node, &midc->free_list);
+
+	spin_unlock_bh(&midc->lock);
+	if (callback_txd) {
+		pr_debug("MDMA: TXD callback set ... calling\n");
+		callback_txd(param_txd);
+		spin_lock_bh(&midc->lock);
+		return;
+	}
+	spin_lock_bh(&midc->lock);
+
+}
+/**
+ * midc_scan_descriptors -		check the descriptors in channel
+ *					mark completed when tx is completete
+ * @mid: device
+ * @midc: channel to scan
+ *
+ * Walk the descriptor chain for the device and process any entries
+ * that are complete.
+ */
+static void midc_scan_descriptors(struct middma_device *mid,
+				struct intel_mid_dma_chan *midc)
+{
+	struct intel_mid_dma_desc *desc = NULL, *_desc = NULL;
+
+	/*tx is complete*/
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		if (desc->status == DMA_IN_PROGRESS)  {
+			desc->status = DMA_SUCCESS;
+			midc_descriptor_complete(midc, desc);
+		}
+	}
+	return;
+}
+
+/*****************************************************************************
+DMA engine callback Functions*/
+/**
+ * intel_mid_dma_tx_submit -	callback to submit DMA transaction
+ * @tx: dma engine descriptor
+ *
+ * Submit the DMA trasaction for this descriptor, start if ch idle
+ */
+static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct intel_mid_dma_desc	*desc = to_intel_mid_dma_desc(tx);
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+
+	spin_lock_bh(&midc->lock);
+	cookie = midc->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	midc->chan.cookie = cookie;
+	desc->txd.cookie = cookie;
+
+
+	if (list_empty(&midc->active_list)) {
+		midc_dostart(midc, desc);
+		list_add_tail(&desc->desc_node, &midc->active_list);
+	} else {
+		list_add_tail(&desc->desc_node, &midc->queue);
+	}
+	spin_unlock_bh(&midc->lock);
+
+	return cookie;
+}
+
+/**
+ * intel_mid_dma_issue_pending -	callback to issue pending txn
+ * @chan: chan where pending trascation needs to be checked and submitted
+ *
+ * Call for scan to issue pending descriptors
+ */
+static void intel_mid_dma_issue_pending(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+
+	spin_lock_bh(&midc->lock);
+	if (!list_empty(&midc->queue))
+		midc_scan_descriptors(to_middma_device(chan->device), midc);
+	spin_unlock_bh(&midc->lock);
+}
+
+/**
+ * intel_mid_dma_tx_status -	Return status of txn
+ * @chan: chan for where status needs to be checked
+ * @cookie: cookie for txn
+ * @txstate: DMA txn state
+ *
+ * Return status of DMA txn
+ */
+static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
+						dma_cookie_t cookie,
+						struct dma_tx_state *txstate)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	dma_cookie_t		last_used;
+	dma_cookie_t		last_complete;
+	int				ret;
+
+	last_complete = midc->completed;
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret != DMA_SUCCESS) {
+		midc_scan_descriptors(to_middma_device(chan->device), midc);
+
+		last_complete = midc->completed;
+		last_used = chan->cookie;
+
+		ret = dma_async_is_complete(cookie, last_complete, last_used);
+	}
+
+	if (txstate) {
+		txstate->last = last_complete;
+		txstate->used = last_used;
+		txstate->residue = 0;
+	}
+	return ret;
+}
+
+/**
+ * intel_mid_dma_device_control -	DMA device control
+ * @chan: chan for DMA control
+ * @cmd: control cmd
+ * @arg: cmd arg value
+ *
+ * Perform DMA control command
+ */
+static int intel_mid_dma_device_control(struct dma_chan *chan,
+			enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc, *_desc;
+	LIST_HEAD(list);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	spin_lock_bh(&midc->lock);
+	if (midc->in_use == false) {
+		spin_unlock_bh(&midc->lock);
+		return 0;
+	}
+	list_splice_init(&midc->free_list, &list);
+	midc->descs_allocated = 0;
+	midc->slave = NULL;
+
+	/* Disable interrupts */
+	disable_dma_interrupt(midc);
+
+	spin_unlock_bh(&midc->lock);
+	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+		pr_debug("MDMA: freeing descriptor %p\n", desc);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	return 0;
+}
+
+/**
+ * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
+ * @chan: chan for DMA transfer
+ * @sgl: scatter gather list
+ * @sg_len: length of sg txn
+ * @direction: DMA transfer dirtn
+ * @flags: DMA flags
+ *
+ * Do DMA sg txn: NOT supported now
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
+			struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned long flags)
+{
+	/*not supported now*/
+	return NULL;
+}
+
+/**
+ * intel_mid_dma_prep_memcpy -	Prep memcpy txn
+ * @chan: chan for DMA transfer
+ * @dest: destn address
+ * @src: src address
+ * @len: DMA transfer len
+ * @flags: DMA flags
+ *
+ * Perform a DMA memcpy. Note we support slave periphral DMA transfers only
+ * The periphral txn details should be filled in slave structure properly
+ * Returns the descriptor for this txn
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
+			struct dma_chan *chan, dma_addr_t dest,
+			dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct intel_mid_dma_chan *midc;
+	struct intel_mid_dma_desc *desc = NULL;
+	struct intel_mid_dma_slave *mids;
+	union intel_mid_dma_ctl_lo ctl_lo;
+	union intel_mid_dma_ctl_hi ctl_hi;
+	union intel_mid_dma_cfg_lo cfg_lo;
+	union intel_mid_dma_cfg_hi cfg_hi;
+	enum intel_mid_dma_width width = 0;
+
+	pr_debug("MDMA: Prep for memcpy\n");
+	WARN_ON(!chan);
+	if (!len)
+		return NULL;
+
+	mids = chan->private;
+	WARN_ON(!mids);
+
+	midc = to_intel_mid_dma_chan(chan);
+	WARN_ON(!midc);
+
+	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
+				midc->dma->pci_id, midc->ch_id, len);
+	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
+		mids->cfg_mode, mids->dirn, mids->hs_mode, mids->src_width);
+
+	/*calculate CFG_LO*/
+	if (mids->hs_mode == LNW_DMA_SW_HS) {
+		cfg_lo.cfg_lo = 0;
+		cfg_lo.cfgx.hs_sel_dst = 1;
+		cfg_lo.cfgx.hs_sel_src = 1;
+	} else if (mids->hs_mode == LNW_DMA_HW_HS)
+		cfg_lo.cfg_lo = 0x00000;
+
+	/*calculate CFG_HI*/
+	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
+		/*SW HS only*/
+		cfg_hi.cfg_hi = 0;
+	} else {
+		cfg_hi.cfg_hi = 0;
+		if (midc->dma->pimr_mask) {
+			cfg_hi.cfgx.protctl = 0x0; /*default value*/
+			cfg_hi.cfgx.fifo_mode = 1;
+			if (mids->dirn == DMA_TO_DEVICE) {
+				cfg_hi.cfgx.src_per = 0;
+				if (mids->device_instance == 0)
+					cfg_hi.cfgx.dst_per = 3;
+				if (mids->device_instance == 1)
+					cfg_hi.cfgx.dst_per = 1;
+			} else if (mids->dirn == DMA_FROM_DEVICE) {
+				if (mids->device_instance == 0)
+					cfg_hi.cfgx.src_per = 2;
+				if (mids->device_instance == 1)
+					cfg_hi.cfgx.src_per = 0;
+				cfg_hi.cfgx.dst_per = 0;
+			}
+		} else {
+			cfg_hi.cfgx.protctl = 0x1; /*default value*/
+			cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per =
+					midc->ch_id - midc->dma->chan_base;
+		}
+	}
+
+	/*calculate CTL_HI*/
+	ctl_hi.ctlx.reser = 0;
+	width = mids->src_width;
+
+	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
+	pr_debug("MDMA:calc len %d for block size %d\n",
+				ctl_hi.ctlx.block_ts, midc->dma->block_size);
+	/*calculate CTL_LO*/
+	ctl_lo.ctl_lo = 0;
+	ctl_lo.ctlx.int_en = 1;
+	ctl_lo.ctlx.dst_tr_width = mids->dst_width;
+	ctl_lo.ctlx.src_tr_width = mids->src_width;
+	ctl_lo.ctlx.dst_msize = mids->src_msize;
+	ctl_lo.ctlx.src_msize = mids->dst_msize;
+
+	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
+		ctl_lo.ctlx.tt_fc = 0;
+		ctl_lo.ctlx.sinc = 0;
+		ctl_lo.ctlx.dinc = 0;
+	} else {
+		if (mids->dirn == DMA_TO_DEVICE) {
+			ctl_lo.ctlx.sinc = 0;
+			ctl_lo.ctlx.dinc = 2;
+			ctl_lo.ctlx.tt_fc = 1;
+		} else if (mids->dirn == DMA_FROM_DEVICE) {
+			ctl_lo.ctlx.sinc = 2;
+			ctl_lo.ctlx.dinc = 0;
+			ctl_lo.ctlx.tt_fc = 2;
+		}
+	}
+
+	pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n",
+		ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi);
+
+	enable_dma_interrupt(midc);
+
+	desc = midc_desc_get(midc);
+	if (desc == NULL)
+		goto err_desc_get;
+	desc->sar = src;
+	desc->dar = dest ;
+	desc->len = len;
+	desc->cfg_hi = cfg_hi.cfg_hi;
+	desc->cfg_lo = cfg_lo.cfg_lo;
+	desc->ctl_lo = ctl_lo.ctl_lo;
+	desc->ctl_hi = ctl_hi.ctl_hi;
+	desc->width = width;
+	desc->dirn = mids->dirn;
+	return &desc->txd;
+
+err_desc_get:
+	pr_err("ERR_MDMA: Failed to get desc\n");
+	midc_desc_put(midc, desc);
+	return NULL;
+}
+
+/**
+ * intel_mid_dma_free_chan_resources -	Frees dma resources
+ * @chan: chan requiring attention
+ *
+ * Frees the allocated resources on this DMA chan
+ */
+static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc, *_desc;
+
+	if (true == midc->in_use) {
+		/*trying to free ch in use!!!!!*/
+		pr_err("ERR_MDMA: trying to free ch in use\n");
+	}
+
+	spin_lock_bh(&midc->lock);
+	midc->descs_allocated = 0;
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	spin_unlock_bh(&midc->lock);
+	midc->in_use = false;
+	/* Disable CH interrupts */
+	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
+	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
+}
+
+/**
+ * intel_mid_dma_alloc_chan_resources -	Allocate dma resources
+ * @chan: chan requiring attention
+ *
+ * Allocates DMA resources on this chan
+ * Return the descriptors allocated
+ */
+static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc;
+	dma_addr_t		phys;
+	int	i = 0;
+
+
+	/* ASSERT:  channel is idle */
+	if (test_ch_en(mid->dma_base, midc->ch_id)) {
+		/*ch is not idle*/
+		pr_err("ERR_MDMA: ch not idle\n");
+		return -EIO;
+	}
+	midc->completed = chan->cookie = 1;
+
+	spin_lock_bh(&midc->lock);
+	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
+		spin_unlock_bh(&midc->lock);
+		desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
+		if (!desc) {
+			pr_err("ERR_MDMA: desc failed\n");
+			return -ENOMEM;
+			/*check*/
+		}
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = intel_mid_dma_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = phys;
+		spin_lock_bh(&midc->lock);
+		i = ++midc->descs_allocated;
+		list_add_tail(&desc->desc_node, &midc->free_list);
+	}
+	spin_unlock_bh(&midc->lock);
+	midc->in_use = false;
+	pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
+	return i;
+}
+
+/**
+ * midc_handle_error -	Handle DMA txn error
+ * @mid: controller where error occured
+ * @midc: chan where error occured
+ *
+ * Scan the descriptor for error
+ */
+static void midc_handle_error(struct middma_device *mid,
+		struct intel_mid_dma_chan *midc)
+{
+	midc_scan_descriptors(mid, midc);
+}
+
+/**
+ * dma_tasklet -	DMA interrupt tasklet
+ * @data: tasklet arg (the controller structure)
+ *
+ * Scan the controller for interrupts for completion/error
+ * Clear the interrupt and call for handling completion/error
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct middma_device *mid = NULL;
+	struct intel_mid_dma_chan *midc = NULL;
+	u32 status;
+	int i;
+
+	mid = (struct middma_device *)data;
+	if (mid == NULL) {
+		pr_err("ERR_MDMA: tasklet Null param\n");
+		return;
+	}
+	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
+	status = ioread32(mid->dma_base + RAW_TFR);
+	pr_debug("MDMA:RAW_TFR %x\n", status);
+	status &= mid->intr_mask;
+	while (status) {
+		/*txn interrupt*/
+		i = get_ch_index(&status, mid->chan_base);
+		if (i < 0) {
+			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
+			return;
+		}
+		midc = &mid->ch[i];
+		if (midc == NULL) {
+			pr_err("ERR_MDMA:Null param midc\n");
+			return;
+		}
+		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
+				status, midc->ch_id, i);
+		/*clearing this interrupts first*/
+		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
+		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_BLOCK);
+
+		spin_lock_bh(&midc->lock);
+		midc_scan_descriptors(mid, midc);
+		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_TFR);
+		spin_unlock_bh(&midc->lock);
+	}
+
+	status = ioread32(mid->dma_base + RAW_ERR);
+	status &= mid->intr_mask;
+	while (status) {
+		/*err interrupt*/
+		i = get_ch_index(&status, mid->chan_base);
+		if (i < 0) {
+			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
+			return;
+		}
+		midc = &mid->ch[i];
+		if (midc == NULL) {
+			pr_err("ERR_MDMA:Null param midc\n");
+			return;
+		}
+		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
+				status, midc->ch_id, i);
+
+		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR);
+		spin_lock_bh(&midc->lock);
+		midc_handle_error(mid, midc);
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_ERR);
+		spin_unlock_bh(&midc->lock);
+	}
+	pr_debug("MDMA:Exiting takslet...\n");
+	return;
+}
+
+static void dma_tasklet1(unsigned long data)
+{
+	pr_debug("MDMA:in takslet1...\n");
+	return dma_tasklet(data);
+}
+
+static void dma_tasklet2(unsigned long data)
+{
+	pr_debug("MDMA:in takslet2...\n");
+	return dma_tasklet(data);
+}
+
+/**
+ * intel_mid_dma_interrupt -	DMA ISR
+ * @irq: IRQ where interrupt occurred
+ * @data: ISR cllback data (the controller structure)
+ *
+ * See if this is our interrupt if so then schedule the tasklet
+ * otherwise ignore
+ */
+static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
+{
+	struct middma_device *mid = data;
+	u32 status;
+	int call_tasklet = 0;
+
+	/*DMA Interrupt*/
+	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
+	if (!mid) {
+		pr_err("ERR_MDMA:null pointer mid\n");
+		return -EINVAL;
+	}
+
+	status = ioread32(mid->dma_base + RAW_TFR);
+	pr_debug("MDMA: Status %x, Mask %x\n", status, mid->intr_mask);
+	status &= mid->intr_mask;
+	if (status) {
+		/*need to disable intr*/
+		iowrite32((status << 8), mid->dma_base + MASK_TFR);
+		pr_debug("MDMA: Calling tasklet %x\n", status);
+		call_tasklet = 1;
+	}
+	status = ioread32(mid->dma_base + RAW_ERR);
+	status &= mid->intr_mask;
+	if (status) {
+		iowrite32(MASK_INTR_REG(status), mid->dma_base + MASK_ERR);
+		call_tasklet = 1;
+	}
+	if (call_tasklet)
+		tasklet_schedule(&mid->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data)
+{
+	return intel_mid_dma_interrupt(irq, data);
+}
+
+static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data)
+{
+	return intel_mid_dma_interrupt(irq, data);
+}
+
+/**
+ * mid_setup_dma -	Setup the DMA controller
+ * @pdev: Controller PCI device structure
+ *
+ * Initilize the DMA controller, channels, registers with DMA engine,
+ * ISR. Initilize DMA controller channels.
+ */
+static int mid_setup_dma(struct pci_dev *pdev)
+{
+	struct middma_device *dma = pci_get_drvdata(pdev);
+	int err, i;
+	unsigned int irq_level;
+
+	/* DMA coherent memory pool for DMA descriptor allocations */
+	dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
+					sizeof(struct intel_mid_dma_desc),
+					32, 0);
+	if (NULL == dma->dma_pool) {
+		pr_err("ERR_MDMA:pci_pool_create failed\n");
+		err = -ENOMEM;
+		kfree(dma);
+		goto err_dma_pool;
+	}
+
+	INIT_LIST_HEAD(&dma->common.channels);
+	dma->pci_id = pdev->device;
+	if (dma->pimr_mask) {
+		dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE,
+					LNW_PERIPHRAL_MASK_SIZE);
+		if (dma->mask_reg == NULL) {
+			pr_err("ERR_MDMA:Cant map periphral intr space !!\n");
+			return -ENOMEM;
+		}
+	} else
+		dma->mask_reg = NULL;
+
+	pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
+	/*init CH structures*/
+	dma->intr_mask = 0;
+	for (i = 0; i < dma->max_chan; i++) {
+		struct intel_mid_dma_chan *midch = &dma->ch[i];
+
+		midch->chan.device = &dma->common;
+		midch->chan.cookie =  1;
+		midch->chan.chan_id = i;
+		midch->ch_id = dma->chan_base + i;
+		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
+
+		midch->dma_base = dma->dma_base;
+		midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id;
+		midch->dma = dma;
+		dma->intr_mask |= 1 << (dma->chan_base + i);
+		spin_lock_init(&midch->lock);
+
+		INIT_LIST_HEAD(&midch->active_list);
+		INIT_LIST_HEAD(&midch->queue);
+		INIT_LIST_HEAD(&midch->free_list);
+		/*mask interrupts*/
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_BLOCK);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_SRC_TRAN);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_DST_TRAN);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_ERR);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_TFR);
+
+		disable_dma_interrupt(midch);
+		list_add_tail(&midch->chan.device_node, &dma->common.channels);
+	}
+	pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask);
+
+	/*init dma structure*/
+	dma_cap_zero(dma->common.cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->common.cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->common.cap_mask);
+	dma->common.dev = &pdev->dev;
+	dma->common.chancnt = dma->max_chan;
+
+	dma->common.device_alloc_chan_resources =
+					intel_mid_dma_alloc_chan_resources;
+	dma->common.device_free_chan_resources =
+					intel_mid_dma_free_chan_resources;
+
+	dma->common.device_tx_status = intel_mid_dma_tx_status;
+	dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy;
+	dma->common.device_issue_pending = intel_mid_dma_issue_pending;
+	dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg;
+	dma->common.device_control = intel_mid_dma_device_control;
+
+	/*enable dma cntrl*/
+	iowrite32(REG_BIT0, dma->dma_base + DMA_CFG);
+
+	/*register irq */
+	if (dma->pimr_mask) {
+		irq_level = IRQF_SHARED;
+		pr_debug("MDMA:Requesting irq shared for DMAC1\n");
+		err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
+			IRQF_SHARED, "INTEL_MID_DMAC1", dma);
+		if (0 != err)
+			goto err_irq;
+	} else {
+		dma->intr_mask = 0x03;
+		irq_level = 0;
+		pr_debug("MDMA:Requesting irq for DMAC2\n");
+		err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
+			0, "INTEL_MID_DMAC2", dma);
+		if (0 != err)
+			goto err_irq;
+	}
+	/*register device w/ engine*/
+	err = dma_async_device_register(&dma->common);
+	if (0 != err) {
+		pr_err("ERR_MDMA:device_register failed: %d\n", err);
+		goto err_engine;
+	}
+	if (dma->pimr_mask) {
+		pr_debug("setting up tasklet1 for DMAC1\n");
+		tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma);
+	} else {
+		pr_debug("setting up tasklet2 for DMAC2\n");
+		tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma);
+	}
+	return 0;
+
+err_engine:
+	free_irq(pdev->irq, dma);
+err_irq:
+	pci_pool_destroy(dma->dma_pool);
+	kfree(dma);
+err_dma_pool:
+	pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
+	return err;
+
+}
+
+/**
+ * middma_shutdown -	Shutdown the DMA controller
+ * @pdev: Controller PCI device structure
+ *
+ * Called by remove
+ * Unregister DMa controller, clear all structures and free interrupt
+ */
+static void middma_shutdown(struct pci_dev *pdev)
+{
+	struct middma_device *device = pci_get_drvdata(pdev);
+
+	dma_async_device_unregister(&device->common);
+	pci_pool_destroy(device->dma_pool);
+	if (device->mask_reg)
+		iounmap(device->mask_reg);
+	if (device->dma_base)
+		iounmap(device->dma_base);
+	free_irq(pdev->irq, device);
+	return;
+}
+
+/**
+ * intel_mid_dma_probe -	PCI Probe
+ * @pdev: Controller PCI device structure
+ * @id: pci device id structure
+ *
+ * Initilize the PCI device, map BARs, query driver data.
+ * Call setup_dma to complete contoller and chan initilzation
+ */
+static int __devinit intel_mid_dma_probe(struct pci_dev *pdev,
+					const struct pci_device_id *id)
+{
+	struct middma_device *device;
+	u32 base_addr, bar_size;
+	struct intel_mid_dma_probe_info *info;
+	int err;
+
+	pr_debug("MDMA: probe for %x\n", pdev->device);
+	info = (void *)id->driver_data;
+	pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n",
+				info->max_chan, info->ch_base,
+				info->block_size, info->pimr_mask);
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_enable_device;
+
+	err = pci_request_regions(pdev, "intel_mid_dmac");
+	if (err)
+		goto err_request_regions;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		goto err_set_dma_mask;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		goto err_set_dma_mask;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device) {
+		pr_err("ERR_MDMA:kzalloc failed probe\n");
+		err = -ENOMEM;
+		goto err_kzalloc;
+	}
+	device->pdev = pci_dev_get(pdev);
+
+	base_addr = pci_resource_start(pdev, 0);
+	bar_size  = pci_resource_len(pdev, 0);
+	device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE);
+	if (!device->dma_base) {
+		pr_err("ERR_MDMA:ioremap failed\n");
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+	pci_set_drvdata(pdev, device);
+	pci_set_master(pdev);
+	device->max_chan = info->max_chan;
+	device->chan_base = info->ch_base;
+	device->block_size = info->block_size;
+	device->pimr_mask = info->pimr_mask;
+
+	err = mid_setup_dma(pdev);
+	if (err)
+		goto err_dma;
+
+	return 0;
+
+err_dma:
+	iounmap(device->dma_base);
+err_ioremap:
+	pci_dev_put(pdev);
+	kfree(device);
+err_kzalloc:
+err_set_dma_mask:
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+err_request_regions:
+err_enable_device:
+	pr_err("ERR_MDMA:Probe failed %d\n", err);
+	return err;
+}
+
+/**
+ * intel_mid_dma_remove -	PCI remove
+ * @pdev: Controller PCI device structure
+ *
+ * Free up all resources and data
+ * Call shutdown_dma to complete contoller and chan cleanup
+ */
+static void __devexit intel_mid_dma_remove(struct pci_dev *pdev)
+{
+	struct middma_device *device = pci_get_drvdata(pdev);
+	middma_shutdown(pdev);
+	pci_dev_put(pdev);
+	kfree(device);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+/******************************************************************************
+* PCI stuff
+*/
+static struct pci_device_id intel_mid_dma_ids[] = {
+	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID),	INFO(2, 6, 4095, 0x200020)},
+	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID),	INFO(2, 0, 2047, 0)},
+	{ PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID),	INFO(2, 0, 2047, 0)},
+	{ PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID),	INFO(4, 0, 4095, 0x400040)},
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
+
+static struct pci_driver intel_mid_dma_pci = {
+	.name		=	"Intel MID DMA",
+	.id_table	=	intel_mid_dma_ids,
+	.probe		=	intel_mid_dma_probe,
+	.remove		=	__devexit_p(intel_mid_dma_remove),
+};
+
+static int __init intel_mid_dma_init(void)
+{
+	pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n",
+			INTEL_MID_DMA_DRIVER_VERSION);
+	return pci_register_driver(&intel_mid_dma_pci);
+}
+fs_initcall(intel_mid_dma_init);
+
+static void __exit intel_mid_dma_exit(void)
+{
+	pci_unregister_driver(&intel_mid_dma_pci);
+}
+module_exit(intel_mid_dma_exit);
+
+MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>");
+MODULE_DESCRIPTION("Intel (R) MID DMAC Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION);
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
new file mode 100644
index 0000000..d81aa65
--- /dev/null
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -0,0 +1,260 @@
+/*
+ *  intel_mid_dma_regs.h - Intel MID DMA Drivers
+ *
+ *  Copyright (C) 2008-10 Intel Corp
+ *  Author: Vinod Koul <vinod.koul@intel.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *
+ */
+#ifndef __INTEL_MID_DMAC_REGS_H__
+#define __INTEL_MID_DMAC_REGS_H__
+
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/pci_ids.h>
+
+#define INTEL_MID_DMA_DRIVER_VERSION "1.0.5"
+
+#define	REG_BIT0		0x00000001
+#define	REG_BIT8		0x00000100
+
+#define UNMASK_INTR_REG(chan_num) \
+	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
+#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
+
+#define ENABLE_CHANNEL(chan_num) \
+	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
+
+#define DESCS_PER_CHANNEL	16
+/*DMA Registers*/
+/*registers associated with channel programming*/
+#define DMA_REG_SIZE		0x400
+#define DMA_CH_SIZE		0x58
+
+/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
+#define SAR			0x00 /* Source Address Register*/
+#define DAR			0x08 /* Destination Address Register*/
+#define CTL_LOW			0x18 /* Control Register*/
+#define CTL_HIGH		0x1C /* Control Register*/
+#define CFG_LOW			0x40 /* Configuration Register Low*/
+#define CFG_HIGH		0x44 /* Configuration Register high*/
+
+#define STATUS_TFR		0x2E8
+#define STATUS_BLOCK		0x2F0
+#define STATUS_ERR		0x308
+
+#define RAW_TFR			0x2C0
+#define RAW_BLOCK		0x2C8
+#define RAW_ERR			0x2E0
+
+#define MASK_TFR		0x310
+#define MASK_BLOCK		0x318
+#define MASK_SRC_TRAN		0x320
+#define MASK_DST_TRAN		0x328
+#define MASK_ERR		0x330
+
+#define CLEAR_TFR		0x338
+#define CLEAR_BLOCK		0x340
+#define CLEAR_SRC_TRAN		0x348
+#define CLEAR_DST_TRAN		0x350
+#define CLEAR_ERR		0x358
+
+#define INTR_STATUS		0x360
+#define DMA_CFG			0x398
+#define DMA_CHAN_EN		0x3A0
+
+/*DMA channel control registers*/
+union intel_mid_dma_ctl_lo {
+	struct {
+		u32	int_en:1;	/*enable or disable interrupts*/
+					/*should be 0*/
+		u32	dst_tr_width:3;	/*destination transfer width*/
+					/*usually 32 bits = 010*/
+		u32	src_tr_width:3; /*source transfer width*/
+					/*usually 32 bits = 010*/
+		u32	dinc:2;		/*destination address inc/dec*/
+					/*For mem:INC=00, Periphral NoINC=11*/
+		u32	sinc:2;		/*source address inc or dec, as above*/
+		u32	dst_msize:3;	/*destination burst transaction length*/
+					/*always = 16 ie 011*/
+		u32	src_msize:3;	/*source burst transaction length*/
+					/*always = 16 ie 011*/
+		u32	reser1:3;
+		u32	tt_fc:3;	/*transfer type and flow controller*/
+					/*M-M = 000
+					  P-M = 010
+					  M-P = 001*/
+		u32	dms:2;		/*destination master select = 0*/
+		u32	sms:2;		/*source master select = 0*/
+		u32	llp_dst_en:1;	/*enable/disable destination LLP = 0*/
+		u32	llp_src_en:1;	/*enable/disable source LLP = 0*/
+		u32	reser2:3;
+	} ctlx;
+	u32	ctl_lo;
+};
+
+union intel_mid_dma_ctl_hi {
+	struct {
+		u32	block_ts:12;	/*block transfer size*/
+					/*configured by DMAC*/
+		u32	reser:20;
+	} ctlx;
+	u32	ctl_hi;
+
+};
+
+/*DMA channel configuration registers*/
+union intel_mid_dma_cfg_lo {
+	struct {
+		u32	reser1:5;
+		u32	ch_prior:3;	/*channel priority = 0*/
+		u32	ch_susp:1;	/*channel suspend = 0*/
+		u32	fifo_empty:1;	/*FIFO empty or not R bit = 0*/
+		u32	hs_sel_dst:1;	/*select HW/SW destn handshaking*/
+					/*HW = 0, SW = 1*/
+		u32	hs_sel_src:1;	/*select HW/SW src handshaking*/
+		u32	reser2:6;
+		u32	dst_hs_pol:1;	/*dest HS interface polarity*/
+		u32	src_hs_pol:1;	/*src HS interface polarity*/
+		u32	max_abrst:10;	/*max AMBA burst len = 0 (no sw limit*/
+		u32	reload_src:1;	/*auto reload src addr =1 if src is P*/
+		u32	reload_dst:1;	/*AR destn addr =1 if dstn is P*/
+	} cfgx;
+	u32	cfg_lo;
+};
+
+union intel_mid_dma_cfg_hi {
+	struct {
+		u32	fcmode:1;	/*flow control mode = 1*/
+		u32	fifo_mode:1;	/*FIFO mode select = 1*/
+		u32	protctl:3;	/*protection control = 0*/
+		u32	rsvd:2;
+		u32	src_per:4;	/*src hw HS interface*/
+		u32	dst_per:4;	/*dstn hw HS interface*/
+		u32	reser2:17;
+	} cfgx;
+	u32	cfg_hi;
+};
+
+/**
+ * struct intel_mid_dma_chan - internal mid representation of a DMA channel
+ * @chan: dma_chan strcture represetation for mid chan
+ * @ch_regs: MMIO register space pointer to channel register
+ * @dma_base: MMIO register space DMA engine base pointer
+ * @ch_id: DMA channel id
+ * @lock: channel spinlock
+ * @completed: DMA cookie
+ * @active_list: current active descriptors
+ * @queue: current queued up descriptors
+ * @free_list: current free descriptors
+ * @slave: dma slave struture
+ * @descs_allocated: total number of decsiptors allocated
+ * @dma: dma device struture pointer
+ * @in_use: bool representing if ch is in use or not
+ */
+struct intel_mid_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*ch_regs;
+	void __iomem		*dma_base;
+	int			ch_id;
+	spinlock_t		lock;
+	dma_cookie_t		completed;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	struct intel_mid_dma_slave	*slave;
+	unsigned int		descs_allocated;
+	struct middma_device	*dma;
+	bool			in_use;
+};
+
+static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
+						struct dma_chan *chan)
+{
+	return container_of(chan, struct intel_mid_dma_chan, chan);
+}
+
+/**
+ * struct middma_device - internal representation of a DMA device
+ * @pdev: PCI device
+ * @dma_base: MMIO register space pointer of DMA
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @tasklet: dma tasklet for processing interrupts
+ * @ch: per channel data
+ * @pci_id: DMA device PCI ID
+ * @intr_mask: Interrupt mask to be used
+ * @mask_reg: MMIO register for periphral mask
+ * @chan_base: Base ch index (read from driver data)
+ * @max_chan: max number of chs supported (from drv_data)
+ * @block_size: Block size of DMA transfer supported (from drv_data)
+ * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
+ */
+struct middma_device {
+	struct pci_dev		*pdev;
+	void __iomem		*dma_base;
+	struct pci_pool		*dma_pool;
+	struct dma_device	common;
+	struct tasklet_struct   tasklet;
+	struct intel_mid_dma_chan ch[MAX_CHAN];
+	unsigned int		pci_id;
+	unsigned int		intr_mask;
+	void __iomem		*mask_reg;
+	int			chan_base;
+	int			max_chan;
+	int			block_size;
+	unsigned int		pimr_mask;
+};
+
+static inline struct middma_device *to_middma_device(struct dma_device *common)
+{
+	return container_of(common, struct middma_device, common);
+}
+
+struct intel_mid_dma_desc {
+	void __iomem			*block; /*ch ptr*/
+	struct list_head		desc_node;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+	dma_addr_t			sar;
+	dma_addr_t			dar;
+	u32				cfg_hi;
+	u32				cfg_lo;
+	u32				ctl_lo;
+	u32				ctl_hi;
+	dma_addr_t			next;
+	enum dma_data_direction		dirn;
+	enum dma_status			status;
+	enum intel_mid_dma_width	width; /*width of DMA txn*/
+	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
+
+};
+
+static inline int test_ch_en(void __iomem *dma, u32 ch_no)
+{
+	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
+	return (en_reg >> ch_no) & 0x1;
+}
+
+static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
+		(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct intel_mid_dma_desc, txd);
+}
+#endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
new file mode 100644
index 0000000..d9d08b6
--- /dev/null
+++ b/include/linux/intel_mid_dma.h
@@ -0,0 +1,86 @@
+/*
+ *  intel_mid_dma.h - Intel MID DMA Drivers
+ *
+ *  Copyright (C) 2008-10 Intel Corp
+ *  Author: Vinod Koul <vinod.koul@intel.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *
+ */
+#ifndef __INTEL_MID_DMA_H__
+#define __INTEL_MID_DMA_H__
+
+#include <linux/dmaengine.h>
+
+/*DMA transaction width, src and dstn width would be same
+The DMA length must be width aligned,
+for 32 bit width the length must be 32 bit (4bytes) aligned only*/
+enum intel_mid_dma_width {
+	LNW_DMA_WIDTH_8BIT = 0x0,
+	LNW_DMA_WIDTH_16BIT = 0x1,
+	LNW_DMA_WIDTH_32BIT = 0x2,
+};
+
+/*DMA mode configurations*/
+enum intel_mid_dma_mode {
+	LNW_DMA_PER_TO_MEM = 0, /*periphral to memory configuration*/
+	LNW_DMA_MEM_TO_PER,	/*memory to periphral configuration*/
+	LNW_DMA_MEM_TO_MEM,	/*mem to mem confg (testing only)*/
+};
+
+/*DMA handshaking*/
+enum intel_mid_dma_hs_mode {
+	LNW_DMA_HW_HS = 0,	/*HW Handshaking only*/
+	LNW_DMA_SW_HS = 1,	/*SW Handshaking not recommended*/
+};
+
+/*Burst size configuration*/
+enum intel_mid_dma_msize {
+	LNW_DMA_MSIZE_1 = 0x0,
+	LNW_DMA_MSIZE_4 = 0x1,
+	LNW_DMA_MSIZE_8 = 0x2,
+	LNW_DMA_MSIZE_16 = 0x3,
+	LNW_DMA_MSIZE_32 = 0x4,
+	LNW_DMA_MSIZE_64 = 0x5,
+};
+
+/**
+ * struct intel_mid_dma_slave - DMA slave structure
+ *
+ * @dirn: DMA trf direction
+ * @src_width: tx register width
+ * @dst_width: rx register width
+ * @hs_mode: HW/SW handshaking mode
+ * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
+ * @src_msize: Source DMA burst size
+ * @dst_msize: Dst DMA burst size
+ * @device_instance: DMA peripheral device instance, we can have multiple
+ *		peripheral device connected to single DMAC
+ */
+struct intel_mid_dma_slave {
+	enum dma_data_direction		dirn;
+	enum intel_mid_dma_width	src_width; /*width of DMA src txn*/
+	enum intel_mid_dma_width	dst_width; /*width of DMA dst txn*/
+	enum intel_mid_dma_hs_mode	hs_mode;  /*handshaking*/
+	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
+	enum intel_mid_dma_msize	src_msize; /*size if src burst*/
+	enum intel_mid_dma_msize	dst_msize; /*size of dst burst*/
+	unsigned int		device_instance; /*0, 1 for periphral instance*/
+};
+
+#endif /*__INTEL_MID_DMA_H__*/
-- 
cgit v0.10.2


From 0c42bd0e425e9c8ddb7019fc446f7d915e36c5f6 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Fri, 30 Jul 2010 16:23:03 +0800
Subject: dmaengine: Driver for Topcliff PCH DMA controller

Topcliff PCH is the platform controller hub that is going to
be used in Intel's upcoming general embedded platforms. This
adds the driver for Topcliff PCH DMA controller. The DMA
channels are strictly for device to host or host to device
transfers and cannot be used for generic memcpy.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
[kill GFP_ATOMIC, kill __raw_{read|write}l, locking fixlet]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6a3e5bf..fed5763 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -188,6 +188,13 @@ config PL330_DMA
 	  You need to provide platform specific settings via
 	  platform_data for a dma-pl330 device.
 
+config PCH_DMA
+	tristate "Topcliff PCH DMA support"
+	depends on PCI && X86
+	select DMA_ENGINE
+	help
+	  Enable support for the Topcliff PCH DMA engine.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 27b6c69..72bd703 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PCH_DMA) += pch_dma.o
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
new file mode 100644
index 0000000..37139ca
--- /dev/null
+++ b/drivers/dma/pch_dma.c
@@ -0,0 +1,957 @@
+/*
+ * Topcliff PCH DMA controller driver
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pch_dma.h>
+
+#define DRV_NAME "pch-dma"
+
+#define DMA_CTL0_DISABLE		0x0
+#define DMA_CTL0_SG			0x1
+#define DMA_CTL0_ONESHOT		0x2
+#define DMA_CTL0_MODE_MASK_BITS		0x3
+#define DMA_CTL0_DIR_SHIFT_BITS		2
+#define DMA_CTL0_BITS_PER_CH		4
+
+#define DMA_CTL2_START_SHIFT_BITS	8
+#define DMA_CTL2_IRQ_ENABLE_MASK	((1UL << DMA_CTL2_START_SHIFT_BITS) - 1)
+
+#define DMA_STATUS_IDLE			0x0
+#define DMA_STATUS_DESC_READ		0x1
+#define DMA_STATUS_WAIT			0x2
+#define DMA_STATUS_ACCESS		0x3
+#define DMA_STATUS_BITS_PER_CH		2
+#define DMA_STATUS_MASK_BITS		0x3
+#define DMA_STATUS_SHIFT_BITS		16
+#define DMA_STATUS_IRQ(x)		(0x1 << (x))
+#define DMA_STATUS_ERR(x)		(0x1 << ((x) + 8))
+
+#define DMA_DESC_WIDTH_SHIFT_BITS	12
+#define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_2_BYTES		(0x2 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_4_BYTES		(0x0 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_MAX_COUNT_1_BYTE	0x3FF
+#define DMA_DESC_MAX_COUNT_2_BYTES	0x3FF
+#define DMA_DESC_MAX_COUNT_4_BYTES	0x7FF
+#define DMA_DESC_END_WITHOUT_IRQ	0x0
+#define DMA_DESC_END_WITH_IRQ		0x1
+#define DMA_DESC_FOLLOW_WITHOUT_IRQ	0x2
+#define DMA_DESC_FOLLOW_WITH_IRQ	0x3
+
+#define MAX_CHAN_NR			8
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+struct pch_dma_desc_regs {
+	u32	dev_addr;
+	u32	mem_addr;
+	u32	size;
+	u32	next;
+};
+
+struct pch_dma_regs {
+	u32	dma_ctl0;
+	u32	dma_ctl1;
+	u32	dma_ctl2;
+	u32	reserved1;
+	u32	dma_sts0;
+	u32	dma_sts1;
+	u32	reserved2;
+	u32	reserved3;
+	struct pch_dma_desc_regs desc[0];
+};
+
+struct pch_dma_desc {
+	struct pch_dma_desc_regs regs;
+	struct dma_async_tx_descriptor txd;
+	struct list_head	desc_node;
+	struct list_head	tx_list;
+};
+
+struct pch_dma_chan {
+	struct dma_chan		chan;
+	void __iomem *membase;
+	enum dma_data_direction	dir;
+	struct tasklet_struct	tasklet;
+	unsigned long		err_status;
+
+	spinlock_t		lock;
+
+	dma_cookie_t		completed_cookie;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+};
+
+#define PDC_DEV_ADDR	0x00
+#define PDC_MEM_ADDR	0x04
+#define PDC_SIZE	0x08
+#define PDC_NEXT	0x0C
+
+#define channel_readl(pdc, name) \
+	readl((pdc)->membase + PDC_##name)
+#define channel_writel(pdc, name, val) \
+	writel((val), (pdc)->membase + PDC_##name)
+
+struct pch_dma {
+	struct dma_device	dma;
+	void __iomem *membase;
+	struct pci_pool		*pool;
+	struct pch_dma_regs	regs;
+	struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
+	struct pch_dma_chan	channels[0];
+};
+
+#define PCH_DMA_CTL0	0x00
+#define PCH_DMA_CTL1	0x04
+#define PCH_DMA_CTL2	0x08
+#define PCH_DMA_STS0	0x10
+#define PCH_DMA_STS1	0x14
+
+#define dma_readl(pd, name) \
+	__raw_readl((pd)->membase + PCH_DMA_##name)
+#define dma_writel(pd, name, val) \
+	__raw_writel((val), (pd)->membase + PCH_DMA_##name)
+
+static inline struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct pch_dma_desc, txd);
+}
+
+static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pch_dma_chan, chan);
+}
+
+static inline struct pch_dma *to_pd(struct dma_device *ddev)
+{
+	return container_of(ddev, struct pch_dma, dma);
+}
+
+static inline struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static inline struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->active_list,
+				struct pch_dma_desc, desc_node);
+}
+
+static inline struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->queue,
+				struct pch_dma_desc, desc_node);
+}
+
+static void pdc_enable_irq(struct dma_chan *chan, int enable)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+
+	val = dma_readl(pd, CTL2);
+
+	if (enable)
+		val |= 0x1 << chan->chan_id;
+	else
+		val &= ~(0x1 << chan->chan_id);
+
+	dma_writel(pd, CTL2, val);
+
+	dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_dir(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+
+	val = dma_readl(pd, CTL0);
+
+	if (pd_chan->dir == DMA_TO_DEVICE)
+		val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+			       DMA_CTL0_DIR_SHIFT_BITS);
+	else
+		val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+				 DMA_CTL0_DIR_SHIFT_BITS));
+
+	dma_writel(pd, CTL0, val);
+
+	dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_mode(struct dma_chan *chan, u32 mode)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+
+	val = dma_readl(pd, CTL0);
+
+	val &= ~(DMA_CTL0_MODE_MASK_BITS <<
+		(DMA_CTL0_BITS_PER_CH * chan->chan_id));
+	val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+
+	dma_writel(pd, CTL0, val);
+
+	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS0);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
+}
+
+static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
+{
+	if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE)
+		return true;
+	else
+		return false;
+}
+
+static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: Attempt to start non-idle channel\n");
+		return;
+	}
+
+	channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr);
+	channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr);
+	channel_writel(pd_chan, SIZE, desc->regs.size);
+	channel_writel(pd_chan, NEXT, desc->regs.next);
+
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.dev_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.mem_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n",
+		pd_chan->chan.chan_id, desc->regs.size);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n",
+		pd_chan->chan.chan_id, desc->regs.next);
+
+	if (list_empty(&desc->tx_list))
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT);
+	else
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG);
+
+	val = dma_readl(pd, CTL2);
+	val |= 1 << (DMA_CTL2_START_SHIFT_BITS + pd_chan->chan.chan_id);
+	dma_writel(pd, CTL2, val);
+}
+
+static void pdc_chain_complete(struct pch_dma_chan *pd_chan,
+			       struct pch_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+	dma_async_tx_callback callback = txd->callback;
+	void *param = txd->callback_param;
+
+	list_splice_init(&desc->tx_list, &pd_chan->free_list);
+	list_move(&desc->desc_node, &pd_chan->free_list);
+
+	if (callback)
+		callback(param);
+}
+
+static void pdc_complete_all(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+
+	if (!list_empty(&pd_chan->queue))
+		pdc_dostart(pd_chan, pdc_first_queued(pd_chan));
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &pd_chan->active_list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+}
+
+static void pdc_handle_error(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *bad_desc;
+
+	bad_desc = pdc_first_active(pd_chan);
+	list_del(&bad_desc->desc_node);
+
+	list_splice_init(&pd_chan->queue, pd_chan->active_list.prev);
+
+	if (!list_empty(&pd_chan->active_list))
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+
+	dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n");
+	dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n",
+		 bad_desc->txd.cookie);
+
+	pdc_chain_complete(pd_chan, bad_desc);
+}
+
+static void pdc_advance_work(struct pch_dma_chan *pd_chan)
+{
+	if (list_empty(&pd_chan->active_list) ||
+		list_is_singular(&pd_chan->active_list)) {
+		pdc_complete_all(pd_chan);
+	} else {
+		pdc_chain_complete(pd_chan, pdc_first_active(pd_chan));
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+	}
+}
+
+static dma_cookie_t pdc_assign_cookie(struct pch_dma_chan *pd_chan,
+				      struct pch_dma_desc *desc)
+{
+	dma_cookie_t cookie = pd_chan->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	pd_chan->chan.cookie = cookie;
+	desc->txd.cookie = cookie;
+
+	return cookie;
+}
+
+static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct pch_dma_desc *desc = to_pd_desc(txd);
+	struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&pd_chan->lock);
+	cookie = pdc_assign_cookie(pd_chan, desc);
+
+	if (list_empty(&pd_chan->active_list)) {
+		list_add_tail(&desc->desc_node, &pd_chan->active_list);
+		pdc_dostart(pd_chan, desc);
+	} else {
+		list_add_tail(&desc->desc_node, &pd_chan->queue);
+	}
+
+	spin_unlock_bh(&pd_chan->lock);
+	return 0;
+}
+
+static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
+{
+	struct pch_dma_desc *desc = NULL;
+	struct pch_dma *pd = to_pd(chan->device);
+	dma_addr_t addr;
+
+	desc = pci_pool_alloc(pd->pool, GFP_KERNEL, &addr);
+	if (desc) {
+		memset(desc, 0, sizeof(struct pch_dma_desc));
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = pd_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = addr;
+	}
+
+	return desc;
+}
+
+static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	struct pch_dma_desc *ret = NULL;
+	int i;
+
+	spin_lock_bh(&pd_chan->lock);
+	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
+		i++;
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
+	}
+	spin_unlock_bh(&pd_chan->lock);
+	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
+
+	if (!ret) {
+		ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO);
+		if (ret) {
+			spin_lock_bh(&pd_chan->lock);
+			pd_chan->descs_allocated++;
+			spin_unlock_bh(&pd_chan->lock);
+		} else {
+			dev_err(chan2dev(&pd_chan->chan),
+				"failed to alloc desc\n");
+		}
+	}
+
+	return ret;
+}
+
+static void pdc_desc_put(struct pch_dma_chan *pd_chan,
+			 struct pch_dma_desc *desc)
+{
+	if (desc) {
+		spin_lock_bh(&pd_chan->lock);
+		list_splice_init(&desc->tx_list, &pd_chan->free_list);
+		list_add(&desc->desc_node, &pd_chan->free_list);
+		spin_unlock_bh(&pd_chan->lock);
+	}
+}
+
+static int pd_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+		return -EIO;
+	}
+
+	if (!list_empty(&pd_chan->free_list))
+		return pd_chan->descs_allocated;
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = pdc_alloc_desc(chan, GFP_KERNEL);
+
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"Only allocated %d initial descriptors\n", i);
+			break;
+		}
+
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	spin_lock_bh(&pd_chan->lock);
+	list_splice(&tmp_list, &pd_chan->free_list);
+	pd_chan->descs_allocated = i;
+	pd_chan->completed_cookie = chan->cookie = 1;
+	spin_unlock_bh(&pd_chan->lock);
+
+	pdc_enable_irq(chan, 1);
+	pdc_set_dir(chan);
+
+	return pd_chan->descs_allocated;
+}
+
+static void pd_free_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(tmp_list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+	BUG_ON(!list_empty(&pd_chan->active_list));
+	BUG_ON(!list_empty(&pd_chan->queue));
+
+	spin_lock_bh(&pd_chan->lock);
+	list_splice_init(&pd_chan->free_list, &tmp_list);
+	pd_chan->descs_allocated = 0;
+	spin_unlock_bh(&pd_chan->lock);
+
+	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
+		pci_pool_free(pd->pool, desc, desc->txd.phys);
+
+	pdc_enable_irq(chan, 0);
+}
+
+static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+				    struct dma_tx_state *txstate)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_completed;
+	int ret;
+
+	spin_lock_bh(&pd_chan->lock);
+	last_completed = pd_chan->completed_cookie;
+	last_used = chan->cookie;
+	spin_unlock_bh(&pd_chan->lock);
+
+	ret = dma_async_is_complete(cookie, last_completed, last_used);
+
+	dma_set_tx_state(txstate, last_completed, last_used, 0);
+
+	return ret;
+}
+
+static void pd_issue_pending(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+
+	if (pdc_is_idle(pd_chan)) {
+		spin_lock_bh(&pd_chan->lock);
+		pdc_advance_work(pd_chan);
+		spin_unlock_bh(&pd_chan->lock);
+	}
+}
+
+static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
+			struct scatterlist *sgl, unsigned int sg_len,
+			enum dma_data_direction direction, unsigned long flags)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_slave *pd_slave = chan->private;
+	struct pch_dma_desc *first = NULL;
+	struct pch_dma_desc *prev = NULL;
+	struct pch_dma_desc *desc = NULL;
+	struct scatterlist *sg;
+	dma_addr_t reg;
+	int i;
+
+	if (unlikely(!sg_len)) {
+		dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n");
+		return NULL;
+	}
+
+	if (direction == DMA_FROM_DEVICE)
+		reg = pd_slave->rx_reg;
+	else if (direction == DMA_TO_DEVICE)
+		reg = pd_slave->tx_reg;
+	else
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc = pdc_desc_get(pd_chan);
+
+		if (!desc)
+			goto err_desc_get;
+
+		desc->regs.dev_addr = reg;
+		desc->regs.mem_addr = sg_phys(sg);
+		desc->regs.size = sg_dma_len(sg);
+		desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ;
+
+		switch (pd_slave->width) {
+		case PCH_DMA_WIDTH_1_BYTE:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_1_BYTE;
+			break;
+		case PCH_DMA_WIDTH_2_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_2_BYTES;
+			break;
+		case PCH_DMA_WIDTH_4_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_4_BYTES;
+			break;
+		default:
+			goto err_desc_get;
+		}
+
+
+		if (!first) {
+			first = desc;
+		} else {
+			prev->regs.next |= desc->txd.phys;
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+
+		prev = desc;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		desc->regs.next = DMA_DESC_END_WITH_IRQ;
+	else
+		desc->regs.next = DMA_DESC_END_WITHOUT_IRQ;
+
+	first->txd.cookie = -EBUSY;
+	desc->txd.flags = flags;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n");
+	pdc_desc_put(pd_chan, first);
+	return NULL;
+}
+
+static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			     unsigned long arg)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	spin_lock_bh(&pd_chan->lock);
+
+	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+
+	spin_unlock_bh(&pd_chan->lock);
+
+
+	return 0;
+}
+
+static void pdc_tasklet(unsigned long data)
+{
+	struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: handle non-idle channel in tasklet\n");
+		return;
+	}
+
+	spin_lock_bh(&pd_chan->lock);
+	if (test_and_clear_bit(0, &pd_chan->err_status))
+		pdc_handle_error(pd_chan);
+	else
+		pdc_advance_work(pd_chan);
+	spin_unlock_bh(&pd_chan->lock);
+}
+
+static irqreturn_t pd_irq(int irq, void *devid)
+{
+	struct pch_dma *pd = (struct pch_dma *)devid;
+	struct pch_dma_chan *pd_chan;
+	u32 sts0;
+	int i;
+	int ret = IRQ_NONE;
+
+	sts0 = dma_readl(pd, STS0);
+
+	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
+
+	for (i = 0; i < pd->dma.chancnt; i++) {
+		pd_chan = &pd->channels[i];
+
+		if (sts0 & DMA_STATUS_IRQ(i)) {
+			if (sts0 & DMA_STATUS_ERR(i))
+				set_bit(0, &pd_chan->err_status);
+
+			tasklet_schedule(&pd_chan->tasklet);
+			ret = IRQ_HANDLED;
+		}
+
+	}
+
+	/* clear interrupt bits in status register */
+	dma_writel(pd, STS0, sts0);
+
+	return ret;
+}
+
+static void pch_dma_save_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	pd->regs.dma_ctl0 = dma_readl(pd, CTL0);
+	pd->regs.dma_ctl1 = dma_readl(pd, CTL1);
+	pd->regs.dma_ctl2 = dma_readl(pd, CTL2);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR);
+		pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR);
+		pd->ch_regs[i].size = channel_readl(pd_chan, SIZE);
+		pd->ch_regs[i].next = channel_readl(pd_chan, NEXT);
+
+		i++;
+	}
+}
+
+static void pch_dma_restore_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	dma_writel(pd, CTL0, pd->regs.dma_ctl0);
+	dma_writel(pd, CTL1, pd->regs.dma_ctl1);
+	dma_writel(pd, CTL2, pd->regs.dma_ctl2);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr);
+		channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr);
+		channel_writel(pd_chan, SIZE, pd->ch_regs[i].size);
+		channel_writel(pd_chan, NEXT, pd->ch_regs[i].next);
+
+		i++;
+	}
+}
+
+static int pch_dma_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+
+	if (pd)
+		pch_dma_save_regs(pd);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int pch_dma_resume(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_dbg(&pdev->dev, "failed to enable device\n");
+		return err;
+	}
+
+	if (pd)
+		pch_dma_restore_regs(pd);
+
+	return 0;
+}
+
+static int __devinit pch_dma_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *id)
+{
+	struct pch_dma *pd;
+	struct pch_dma_regs *regs;
+	unsigned int nr_channels;
+	int err;
+	int i;
+
+	nr_channels = id->driver_data;
+	pd = kzalloc(sizeof(struct pch_dma)+
+		sizeof(struct pch_dma_chan) * nr_channels, GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, pd);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device\n");
+		goto err_free_mem;
+	}
+
+	if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Cannot find proper base address\n");
+		goto err_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "Cannot set proper DMA config\n");
+		goto err_free_res;
+	}
+
+	regs = pd->membase = pci_iomap(pdev, 1, 0);
+	if (!pd->membase) {
+		dev_err(&pdev->dev, "Cannot map MMIO registers\n");
+		err = -ENOMEM;
+		goto err_free_res;
+	}
+
+	pci_set_master(pdev);
+
+	err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request IRQ\n");
+		goto err_iounmap;
+	}
+
+	pd->pool = pci_pool_create("pch_dma_desc_pool", pdev,
+				   sizeof(struct pch_dma_desc), 4, 0);
+	if (!pd->pool) {
+		dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n");
+		err = -ENOMEM;
+		goto err_free_irq;
+	}
+
+	pd->dma.dev = &pdev->dev;
+	pd->dma.chancnt = nr_channels;
+
+	INIT_LIST_HEAD(&pd->dma.channels);
+
+	for (i = 0; i < nr_channels; i++) {
+		struct pch_dma_chan *pd_chan = &pd->channels[i];
+
+		pd_chan->chan.device = &pd->dma;
+		pd_chan->chan.cookie = 1;
+		pd_chan->chan.chan_id = i;
+
+		pd_chan->membase = &regs->desc[i];
+
+		pd_chan->dir = (i % 2) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+		spin_lock_init(&pd_chan->lock);
+
+		INIT_LIST_HEAD(&pd_chan->active_list);
+		INIT_LIST_HEAD(&pd_chan->queue);
+		INIT_LIST_HEAD(&pd_chan->free_list);
+
+		tasklet_init(&pd_chan->tasklet, pdc_tasklet,
+			     (unsigned long)pd_chan);
+		list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels);
+	}
+
+	dma_cap_zero(pd->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, pd->dma.cap_mask);
+
+	pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources;
+	pd->dma.device_free_chan_resources = pd_free_chan_resources;
+	pd->dma.device_tx_status = pd_tx_status;
+	pd->dma.device_issue_pending = pd_issue_pending;
+	pd->dma.device_prep_slave_sg = pd_prep_slave_sg;
+	pd->dma.device_control = pd_device_control;
+
+	err = dma_async_device_register(&pd->dma);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register DMA device\n");
+		goto err_free_pool;
+	}
+
+	return 0;
+
+err_free_pool:
+	pci_pool_destroy(pd->pool);
+err_free_irq:
+	free_irq(pdev->irq, pd);
+err_iounmap:
+	pci_iounmap(pdev, pd->membase);
+err_free_res:
+	pci_release_regions(pdev);
+err_disable_pdev:
+	pci_disable_device(pdev);
+err_free_mem:
+	return err;
+}
+
+static void __devexit pch_dma_remove(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+
+	if (pd) {
+		dma_async_device_unregister(&pd->dma);
+
+		list_for_each_entry_safe(chan, _c, &pd->dma.channels,
+					 device_node) {
+			pd_chan = to_pd_chan(chan);
+
+			tasklet_disable(&pd_chan->tasklet);
+			tasklet_kill(&pd_chan->tasklet);
+		}
+
+		pci_pool_destroy(pd->pool);
+		free_irq(pdev->irq, pd);
+		pci_iounmap(pdev, pd->membase);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		kfree(pd);
+	}
+}
+
+/* PCI Device ID of DMA device */
+#define PCI_DEVICE_ID_PCH_DMA_8CH        0x8810
+#define PCI_DEVICE_ID_PCH_DMA_4CH        0x8815
+
+static const struct pci_device_id pch_dma_id_table[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_8CH), 8 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_4CH), 4 },
+};
+
+static struct pci_driver pch_dma_driver = {
+	.name		= DRV_NAME,
+	.id_table	= pch_dma_id_table,
+	.probe		= pch_dma_probe,
+	.remove		= __devexit_p(pch_dma_remove),
+#ifdef CONFIG_PM
+	.suspend	= pch_dma_suspend,
+	.resume		= pch_dma_resume,
+#endif
+};
+
+static int __init pch_dma_init(void)
+{
+	return pci_register_driver(&pch_dma_driver);
+}
+
+static void __exit pch_dma_exit(void)
+{
+	pci_unregister_driver(&pch_dma_driver);
+}
+
+module_init(pch_dma_init);
+module_exit(pch_dma_exit);
+
+MODULE_DESCRIPTION("Topcliff PCH DMA controller driver");
+MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/pch_dma.h b/include/linux/pch_dma.h
new file mode 100644
index 0000000..fdafe52
--- /dev/null
+++ b/include/linux/pch_dma.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef PCH_DMA_H
+#define PCH_DMA_H
+
+#include <linux/dmaengine.h>
+
+enum pch_dma_width {
+	PCH_DMA_WIDTH_1_BYTE,
+	PCH_DMA_WIDTH_2_BYTES,
+	PCH_DMA_WIDTH_4_BYTES,
+};
+
+struct pch_dma_slave {
+	struct device		*dma_dev;
+	unsigned int		chan_id;
+	dma_addr_t		tx_reg;
+	dma_addr_t		rx_reg;
+	enum pch_dma_width	width;
+};
+
+#endif
-- 
cgit v0.10.2


From c156d0a5b0c667999e06d0bb52e3d1376faec8bf Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 4 Aug 2010 13:37:33 +0200
Subject: DMAENGINE: generic slave channel control v3

This adds an interface to the DMAengine to make it possible to
reconfigure a slave channel at runtime. We add a few foreseen
config parameters to the passed struct, with a void * pointer
for custom per-device or per-platform runtime slave data.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5204f01..c61d4ca 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -114,11 +114,17 @@ enum dma_ctrl_flags {
  * @DMA_TERMINATE_ALL: terminate all ongoing transfers
  * @DMA_PAUSE: pause ongoing transfers
  * @DMA_RESUME: resume paused transfer
+ * @DMA_SLAVE_CONFIG: this command is only implemented by DMA controllers
+ * that need to runtime reconfigure the slave channels (as opposed to passing
+ * configuration data in statically from the platform). An additional
+ * argument of struct dma_slave_config must be passed in with this
+ * command.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
+	DMA_SLAVE_CONFIG,
 };
 
 /**
@@ -199,6 +205,71 @@ struct dma_chan_dev {
 	atomic_t *idr_ref;
 };
 
+/**
+ * enum dma_slave_buswidth - defines bus with of the DMA slave
+ * device, source or target buses
+ */
+enum dma_slave_buswidth {
+	DMA_SLAVE_BUSWIDTH_UNDEFINED = 0,
+	DMA_SLAVE_BUSWIDTH_1_BYTE = 1,
+	DMA_SLAVE_BUSWIDTH_2_BYTES = 2,
+	DMA_SLAVE_BUSWIDTH_4_BYTES = 4,
+	DMA_SLAVE_BUSWIDTH_8_BYTES = 8,
+};
+
+/**
+ * struct dma_slave_config - dma slave channel runtime config
+ * @direction: whether the data shall go in or out on this slave
+ * channel, right now. DMA_TO_DEVICE and DMA_FROM_DEVICE are
+ * legal values, DMA_BIDIRECTIONAL is not acceptable since we
+ * need to differentiate source and target addresses.
+ * @src_addr: this is the physical address where DMA slave data
+ * should be read (RX), if the source is memory this argument is
+ * ignored.
+ * @dst_addr: this is the physical address where DMA slave data
+ * should be written (TX), if the source is memory this argument
+ * is ignored.
+ * @src_addr_width: this is the width in bytes of the source (RX)
+ * register where DMA data shall be read. If the source
+ * is memory this may be ignored depending on architecture.
+ * Legal values: 1, 2, 4, 8.
+ * @dst_addr_width: same as src_addr_width but for destination
+ * target (TX) mutatis mutandis.
+ * @src_maxburst: the maximum number of words (note: words, as in
+ * units of the src_addr_width member, not bytes) that can be sent
+ * in one burst to the device. Typically something like half the
+ * FIFO depth on I/O peripherals so you don't overflow it. This
+ * may or may not be applicable on memory sources.
+ * @dst_maxburst: same as src_maxburst but for destination target
+ * mutatis mutandis.
+ *
+ * This struct is passed in as configuration data to a DMA engine
+ * in order to set up a certain channel for DMA transport at runtime.
+ * The DMA device/engine has to provide support for an additional
+ * command in the channel config interface, DMA_SLAVE_CONFIG
+ * and this struct will then be passed in as an argument to the
+ * DMA engine device_control() function.
+ *
+ * The rationale for adding configuration information to this struct
+ * is as follows: if it is likely that most DMA slave controllers in
+ * the world will support the configuration option, then make it
+ * generic. If not: if it is fixed so that it be sent in static from
+ * the platform data, then prefer to do that. Else, if it is neither
+ * fixed at runtime, nor generic enough (such as bus mastership on
+ * some CPU family and whatnot) then create a custom slave config
+ * struct and pass that, then make this config a member of that
+ * struct, if applicable.
+ */
+struct dma_slave_config {
+	enum dma_data_direction direction;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	enum dma_slave_buswidth src_addr_width;
+	enum dma_slave_buswidth dst_addr_width;
+	u32 src_maxburst;
+	u32 dst_maxburst;
+};
+
 static inline const char *dma_chan_name(struct dma_chan *chan)
 {
 	return dev_name(&chan->dev->device);
-- 
cgit v0.10.2


From 95e1400fa1317e566b316ff4af947bd5341332c8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 4 Aug 2010 13:37:45 +0200
Subject: DMAENGINE: add runtime slave config to DMA40 v3

This extends the DMA engine driver for the DMA40 used in the
U8500 platform with the generic runtime slave configuration
interface.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 21a7597..17e2600 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -208,6 +208,9 @@ struct d40_chan {
 	struct d40_def_lcsp		 log_def;
 	struct d40_lcla_elem		 lcla;
 	struct d40_log_lli_full		*lcpa;
+	/* Runtime reconfiguration */
+	dma_addr_t			runtime_addr;
+	enum dma_data_direction		runtime_direction;
 };
 
 /**
@@ -1886,9 +1889,16 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 			d40d->lli_tx_len = 1;
 
 	if (direction == DMA_FROM_DEVICE)
-		dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
+		if (d40c->runtime_addr)
+			dev_addr = d40c->runtime_addr;
+		else
+			dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
 	else if (direction == DMA_TO_DEVICE)
-		dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
+		if (d40c->runtime_addr)
+			dev_addr = d40c->runtime_addr;
+		else
+			dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
+
 	else
 		return -EINVAL;
 
@@ -1931,9 +1941,15 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 
 	if (direction == DMA_FROM_DEVICE) {
 		dst_dev_addr = 0;
-		src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
+		if (d40c->runtime_addr)
+			src_dev_addr = d40c->runtime_addr;
+		else
+			src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
 	} else if (direction == DMA_TO_DEVICE) {
-		dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
+		if (d40c->runtime_addr)
+			dst_dev_addr = d40c->runtime_addr;
+		else
+			dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
 		src_dev_addr = 0;
 	} else
 		return -EINVAL;
@@ -2070,6 +2086,117 @@ static void d40_issue_pending(struct dma_chan *chan)
 	spin_unlock_irqrestore(&d40c->lock, flags);
 }
 
+/* Runtime reconfiguration extension */
+static void d40_set_runtime_config(struct dma_chan *chan,
+			       struct dma_slave_config *config)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
+	enum dma_slave_buswidth config_addr_width;
+	dma_addr_t config_addr;
+	u32 config_maxburst;
+	enum stedma40_periph_data_width addr_width;
+	int psize;
+
+	if (config->direction == DMA_FROM_DEVICE) {
+		dma_addr_t dev_addr_rx =
+			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
+
+		config_addr = config->src_addr;
+		if (dev_addr_rx)
+			dev_dbg(d40c->base->dev,
+				"channel has a pre-wired RX address %08x "
+				"overriding with %08x\n",
+				dev_addr_rx, config_addr);
+		if (cfg->dir != STEDMA40_PERIPH_TO_MEM)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for peripheral "
+				"to memory transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = STEDMA40_PERIPH_TO_MEM;
+
+		config_addr_width = config->src_addr_width;
+		config_maxburst = config->src_maxburst;
+
+	} else if (config->direction == DMA_TO_DEVICE) {
+		dma_addr_t dev_addr_tx =
+			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
+
+		config_addr = config->dst_addr;
+		if (dev_addr_tx)
+			dev_dbg(d40c->base->dev,
+				"channel has a pre-wired TX address %08x "
+				"overriding with %08x\n",
+				dev_addr_tx, config_addr);
+		if (cfg->dir != STEDMA40_MEM_TO_PERIPH)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for memory "
+				"to peripheral transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = STEDMA40_MEM_TO_PERIPH;
+
+		config_addr_width = config->dst_addr_width;
+		config_maxburst = config->dst_maxburst;
+
+	} else {
+		dev_err(d40c->base->dev,
+			"unrecognized channel direction %d\n",
+			config->direction);
+		return;
+	}
+
+	switch (config_addr_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		addr_width = STEDMA40_BYTE_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		addr_width = STEDMA40_HALFWORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		addr_width = STEDMA40_WORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
+		break;
+	default:
+		dev_err(d40c->base->dev,
+			"illegal peripheral address width "
+			"requested (%d)\n",
+			config->src_addr_width);
+		return;
+	}
+
+	if (config_maxburst >= 16)
+		psize = STEDMA40_PSIZE_LOG_16;
+	else if (config_maxburst >= 8)
+		psize = STEDMA40_PSIZE_LOG_8;
+	else if (config_maxburst >= 4)
+		psize = STEDMA40_PSIZE_LOG_4;
+	else
+		psize = STEDMA40_PSIZE_LOG_1;
+
+	/* Set up all the endpoint configs */
+	cfg->src_info.data_width = addr_width;
+	cfg->src_info.psize = psize;
+	cfg->src_info.endianess = STEDMA40_LITTLE_ENDIAN;
+	cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+	cfg->dst_info.data_width = addr_width;
+	cfg->dst_info.psize = psize;
+	cfg->dst_info.endianess = STEDMA40_LITTLE_ENDIAN;
+	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+	/* These settings will take precedence later */
+	d40c->runtime_addr = config_addr;
+	d40c->runtime_direction = config->direction;
+	dev_dbg(d40c->base->dev,
+		"configured channel %s for %s, data width %d, "
+		"maxburst %d bytes, LE, no flow control\n",
+		dma_chan_name(chan),
+		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		config_addr_width,
+		config_maxburst);
+}
+
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
@@ -2092,6 +2219,12 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		return d40_pause(chan);
 	case DMA_RESUME:
 		return d40_resume(chan);
+	case DMA_SLAVE_CONFIG:
+		d40_set_runtime_config(chan,
+			(struct dma_slave_config *) arg);
+		return 0;
+	default:
+		break;
 	}
 
 	/* Other commands are unimplemented */
-- 
cgit v0.10.2


From 128f904ac87cb6e63921e80f378fdf9ba532c0f6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 4 Aug 2010 13:37:53 +0200
Subject: DMAENGINE: add runtime slave control to COH 901 318 v3

This extends the DMA engine driver for the COH 901 318 used in the
U300 platform with the generic runtime slave control command.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index a724e6b..557e227 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -72,6 +72,9 @@ struct coh901318_chan {
 	unsigned long nbr_active_done;
 	unsigned long busy;
 
+	u32 runtime_addr;
+	u32 runtime_ctrl;
+
 	struct coh901318_base *base;
 };
 
@@ -190,6 +193,9 @@ static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan)
 static inline dma_addr_t
 cohc_dev_addr(struct coh901318_chan *cohc)
 {
+	/* Runtime supplied address will take precedence */
+	if (cohc->runtime_addr)
+		return cohc->runtime_addr;
 	return cohc->base->platform->chan_conf[cohc->id].dev_addr;
 }
 
@@ -1055,6 +1061,14 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	params = cohc_chan_param(cohc);
 	config = params->config;
+	/*
+	 * Add runtime-specific control on top, make
+	 * sure the bits you set per peripheral channel are
+	 * cleared in the default config from the platform.
+	 */
+	ctrl_chained |= cohc->runtime_ctrl;
+	ctrl_last |= cohc->runtime_ctrl;
+	ctrl |= cohc->runtime_ctrl;
 
 	if (direction == DMA_TO_DEVICE) {
 		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
@@ -1113,6 +1127,12 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (ret)
 		goto err_lli_fill;
 
+	/*
+	 * Set the default ctrl for the channel to the one from the lli,
+	 * things may have changed due to odd buffer alignment etc.
+	 */
+	coh901318_set_ctrl(cohc, lli->control);
+
 	COH_DBG(coh901318_list_print(cohc, lli));
 
 	/* Pick a descriptor to handle this transfer */
@@ -1175,6 +1195,146 @@ coh901318_issue_pending(struct dma_chan *chan)
 	spin_unlock_irqrestore(&cohc->lock, flags);
 }
 
+/*
+ * Here we wrap in the runtime dma control interface
+ */
+struct burst_table {
+	int burst_8bit;
+	int burst_16bit;
+	int burst_32bit;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burst_8bit = 64,
+		.burst_16bit = 32,
+		.burst_32bit = 16,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_64_BYTES,
+	},
+	{
+		.burst_8bit = 48,
+		.burst_16bit = 24,
+		.burst_32bit = 12,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_48_BYTES,
+	},
+	{
+		.burst_8bit = 32,
+		.burst_16bit = 16,
+		.burst_32bit = 8,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_32_BYTES,
+	},
+	{
+		.burst_8bit = 16,
+		.burst_16bit = 8,
+		.burst_32bit = 4,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_16_BYTES,
+	},
+	{
+		.burst_8bit = 8,
+		.burst_16bit = 4,
+		.burst_32bit = 2,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_8_BYTES,
+	},
+	{
+		.burst_8bit = 4,
+		.burst_16bit = 2,
+		.burst_32bit = 1,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_4_BYTES,
+	},
+	{
+		.burst_8bit = 2,
+		.burst_16bit = 1,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_2_BYTES,
+	},
+	{
+		.burst_8bit = 1,
+		.burst_16bit = 0,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_1_BYTE,
+	},
+};
+
+static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
+			struct dma_slave_config *config)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	dma_addr_t addr;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 runtime_ctrl = 0;
+	int i = 0;
+
+	/* We only support mem to per or per to mem transfers */
+	if (config->direction == DMA_FROM_DEVICE) {
+		addr = config->src_addr;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else if (config->direction == DMA_TO_DEVICE) {
+		addr = config->dst_addr;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else {
+		dev_err(COHC_2_DEV(cohc), "illegal channel mode\n");
+		return;
+	}
+
+	dev_dbg(COHC_2_DEV(cohc), "configure channel for %d byte transfers\n",
+		addr_width);
+	switch (addr_width)  {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_8bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_16bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		/* Direction doesn't matter here, it's 32/32 bits */
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_32bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	default:
+		dev_err(COHC_2_DEV(cohc),
+			"bad runtimeconfig: alien address width\n");
+		return;
+	}
+
+	runtime_ctrl |= burst_sizes[i].reg;
+	dev_dbg(COHC_2_DEV(cohc),
+		"selected burst size %d bytes for address width %d bytes, maxburst %d\n",
+		burst_sizes[i].burst_8bit, addr_width, maxburst);
+
+	cohc->runtime_addr = addr;
+	cohc->runtime_ctrl = runtime_ctrl;
+}
+
 static int
 coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		  unsigned long arg)
@@ -1184,6 +1344,14 @@ coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	struct coh901318_desc *cohd;
 	void __iomem *virtbase = cohc->base->virtbase;
 
+	if (cmd == DMA_SLAVE_CONFIG) {
+		struct dma_slave_config *config =
+			(struct dma_slave_config *) arg;
+
+		coh901318_dma_set_runtimeconfig(chan, config);
+		return 0;
+	  }
+
 	if (cmd == DMA_PAUSE) {
 		coh901318_pause(chan);
 		return 0;
@@ -1240,6 +1408,7 @@ coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 
 	return 0;
 }
+
 void coh901318_base_init(struct dma_device *dma, const int *pick_chans,
 			 struct coh901318_base *base)
 {
-- 
cgit v0.10.2


From 556ab45f9a775bfa4762bacc0a4afb5b44b067bc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 23 Jul 2010 15:47:56 -0700
Subject: ioat2: catch and recover from broken vtd configurations v6

On some platforms (MacPro3,1) the BIOS assigns the ioatdma device to the
incorrect iommu causing faults when the driver initializes.  Add a quirk
to catch this misconfiguration and try falling back to untranslated
operation (which works in the MacPro3,1 case).

Assuming there are other platforms with misconfigured iommus teach the
ioatdma driver to treat initialization failures as non-fatal (just fail
the driver load and emit a warning instead of triggering a BUG_ON).

This can be classified as a boot regression since 2.6.32 on affected
platforms since the ioatdma module did not autoload prior to that
kernel.

Cc: <stable@kernel.org>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Reported-by: Chris Li <lkml@chrisli.org>
Tested-by: Chris Li <lkml@chrisli.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 6d3a73b..5216c8a 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -97,6 +97,7 @@ struct ioat_chan_common {
 	#define IOAT_RESET_PENDING 2
 	#define IOAT_KOBJ_INIT_FAIL 3
 	#define IOAT_RESHAPE_PENDING 4
+	#define IOAT_RUN 5
 	struct timer_list timer;
 	#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
 	#define IDLE_TIMEOUT msecs_to_jiffies(2000)
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 3c8b32a..216f9d3 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -287,7 +287,10 @@ void ioat2_timer_event(unsigned long data)
 			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
 				__func__, chanerr);
-			BUG_ON(is_ioat_bug(chanerr));
+			if (test_bit(IOAT_RUN, &chan->state))
+				BUG_ON(is_ioat_bug(chanerr));
+			else /* we never got off the ground */
+				return;
 		}
 
 		/* if we haven't made progress and we have already
@@ -492,6 +495,8 @@ static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gf
 	return ring;
 }
 
+void ioat2_free_chan_resources(struct dma_chan *c);
+
 /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
  * @chan: channel to be initialized
  */
@@ -500,6 +505,7 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 	struct ioat_chan_common *chan = &ioat->base;
 	struct ioat_ring_ent **ring;
+	u64 status;
 	int order;
 
 	/* have we already been set up? */
@@ -540,7 +546,20 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
 	tasklet_enable(&chan->cleanup_task);
 	ioat2_start_null_desc(ioat);
 
-	return 1 << ioat->alloc_order;
+	/* check that we got off the ground */
+	udelay(5);
+	status = ioat_chansts(chan);
+	if (is_ioat_active(status) || is_ioat_idle(status)) {
+		set_bit(IOAT_RUN, &chan->state);
+		return 1 << ioat->alloc_order;
+	} else {
+		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		dev_WARN(to_dev(chan),
+			"failed to start channel chanerr: %#x\n", chanerr);
+		ioat2_free_chan_resources(c);
+		return -EFAULT;
+	}
 }
 
 bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
@@ -778,6 +797,7 @@ void ioat2_free_chan_resources(struct dma_chan *c)
 	del_timer_sync(&chan->timer);
 	device->cleanup_fn((unsigned long) c);
 	device->reset_hw(chan);
+	clear_bit(IOAT_RUN, &chan->state);
 
 	spin_lock_bh(&chan->cleanup_lock);
 	spin_lock_bh(&ioat->prep_lock);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 1cdd22e..d0f4990 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -361,7 +361,10 @@ static void ioat3_timer_event(unsigned long data)
 			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
 				__func__, chanerr);
-			BUG_ON(is_ioat_bug(chanerr));
+			if (test_bit(IOAT_RUN, &chan->state))
+				BUG_ON(is_ioat_bug(chanerr));
+			else /* we never got off the ground */
+				return;
 		}
 
 		/* if we haven't made progress and we have already
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 796828f..90938c7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3029,6 +3029,34 @@ static void __init iommu_exit_mempool(void)
 
 }
 
+static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	u32 vtbar;
+	int rc;
+
+	/* We know that this device on this chipset has its own IOMMU.
+	 * If we find it under a different IOMMU, then the BIOS is lying
+	 * to us. Hope that the IOMMU for this device is actually
+	 * disabled, and it needs no translation...
+	 */
+	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
+	if (rc) {
+		/* "can't" happen */
+		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
+		return;
+	}
+	vtbar &= 0xffff0000;
+
+	/* we know that the this iommu should be at offset 0xa000 from vtbar */
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
+			    TAINT_FIRMWARE_WORKAROUND,
+			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
+		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
+
 static void __init init_no_remapping_devices(void)
 {
 	struct dmar_drhd_unit *drhd;
-- 
cgit v0.10.2


From b9033e682e86f3c6a66763f9b6a3935c5c64e145 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 17 Jul 2010 19:19:48 +0400
Subject: dma: dmatest: fix potential sign bug

'cnt' is unsigned, so this code may become wrong in future as
dmatest_add_threads() can return error code:

	cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
	thread_count += cnt > 0 ? cnt : 0;
		        ^^^^^^^

Now it can return only -EINVAL if and only if second argument of
dmatest_add_threads() is not one of DMA_MEMCPY, DMA_XOR, DMA_PQ.
So, now it is not wrong but may become wrong in future.

The semantic patch that finds this problem (many false-positive results):
(http://coccinelle.lip6.fr/)

// <smpl>
@ r1 @
identifier f;
@@
int f(...) { ... }

@@
identifier r1.f;
type T;
unsigned T x;
@@

*x = f(...)
 ...
*x > 0

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 68d58c4..5589358 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -540,7 +540,7 @@ static int dmatest_add_channel(struct dma_chan *chan)
 	struct dmatest_chan	*dtc;
 	struct dma_device	*dma_dev = chan->device;
 	unsigned int		thread_count = 0;
-	unsigned int		cnt;
+	int cnt;
 
 	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
 	if (!dtc) {
-- 
cgit v0.10.2


From 61cd2203769a2bf35d41f8682f6ef865fe2d23ff Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Thu, 5 Aug 2010 10:38:43 +0800
Subject: DMAENGINE: pch_dma: kill another usage of __raw_{read|write}l

Use {read|write}l instead of __raw_{read|write}l since PCH DMA
controller is PCI device.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 37139ca..3533948 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -133,9 +133,9 @@ struct pch_dma {
 #define PCH_DMA_STS1	0x14
 
 #define dma_readl(pd, name) \
-	__raw_readl((pd)->membase + PCH_DMA_##name)
+	readl((pd)->membase + PCH_DMA_##name)
 #define dma_writel(pd, name, val) \
-	__raw_writel((val), (pd)->membase + PCH_DMA_##name)
+	writel((val), (pd)->membase + PCH_DMA_##name)
 
 static inline struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
 {
-- 
cgit v0.10.2


From b0ebeb9c09cb5b84bb2f33927c84c7648845fcec Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Thu, 5 Aug 2010 10:40:08 +0800
Subject: DMAENGINE: at_hdmac: locking fixlet

atc_chain_complete shall be called with atchan->lock held
and bh disabled.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index bd5250e..646f6d6 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -790,12 +790,12 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	list_splice_init(&atchan->queue, &list);
 	list_splice_init(&atchan->active_list, &list);
 
-	spin_unlock_bh(&atchan->lock);
-
 	/* Flush all pending and queued descriptors */
 	list_for_each_entry_safe(desc, _desc, &list, desc_node)
 		atc_chain_complete(atchan, desc);
 
+	spin_unlock_bh(&atchan->lock);
+
 	return 0;
 }
 
-- 
cgit v0.10.2