30 files changed, 2541 insertions, 1011 deletions
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 5a0cb1ef6164..94b7e0f96b38 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -10,87 +10,181 @@ NOTE: For DMA Engine usage in async_tx please see:
 Below is a guide to device driver writers on how to use the Slave-DMA API of the
 DMA Engine. This is applicable only for slave DMA usage only.
 
-The slave DMA usage consists of following steps
+The slave DMA usage consists of following steps:
 1. Allocate a DMA slave channel
 2. Set slave and controller specific parameters
 3. Get a descriptor for transaction
-4. Submit the transaction and wait for callback notification
+4. Submit the transaction
+5. Issue pending requests and wait for callback notification
 
 1. Allocate a DMA slave channel
-Channel allocation is slightly different in the slave DMA context, client
-drivers typically need a channel from a particular DMA controller only and even
-in some cases a specific channel is desired. To request a channel
-dma_request_channel() API is used.
-
-Interface:
-struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
-		dma_filter_fn filter_fn,
-		void *filter_param);
-where dma_filter_fn is defined as:
-typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
-
-When the optional 'filter_fn' parameter is set to NULL dma_request_channel
-simply returns the first channel that satisfies the capability mask.  Otherwise,
-when the mask parameter is insufficient for specifying the necessary channel,
-the filter_fn routine can be used to disposition the available channels in the
-system. The filter_fn routine is called once for each free channel in the
-system.  Upon seeing a suitable channel filter_fn returns DMA_ACK which flags
-that channel to be the return value from dma_request_channel.  A channel
-allocated via this interface is exclusive to the caller, until
-dma_release_channel() is called.
+
+   Channel allocation is slightly different in the slave DMA context,
+   client drivers typically need a channel from a particular DMA
+   controller only and even in some cases a specific channel is desired.
+   To request a channel dma_request_channel() API is used.
+
+   Interface:
+	struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+			dma_filter_fn filter_fn,
+			void *filter_param);
+   where dma_filter_fn is defined as:
+	typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+   The 'filter_fn' parameter is optional, but highly recommended for
+   slave and cyclic channels as they typically need to obtain a specific
+   DMA channel.
+
+   When the optional 'filter_fn' parameter is NULL, dma_request_channel()
+   simply returns the first channel that satisfies the capability mask.
+
+   Otherwise, the 'filter_fn' routine will be called once for each free
+   channel which has a capability in 'mask'.  'filter_fn' is expected to
+   return 'true' when the desired DMA channel is found.
+
+   A channel allocated via this interface is exclusive to the caller,
+   until dma_release_channel() is called.
 
 2. Set slave and controller specific parameters
-Next step is always to pass some specific information to the DMA driver. Most of
-the generic information which a slave DMA can use is in struct dma_slave_config.
-It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA
-burst lengths etc. If some DMA controllers have more parameters to be sent then
-they should try to embed struct dma_slave_config in their controller specific
-structure. That gives flexibility to client to pass more parameters, if
-required.
-
-Interface:
-int dmaengine_slave_config(struct dma_chan *chan,
-					  struct dma_slave_config *config)
+
+   Next step is always to pass some specific information to the DMA
+   driver.  Most of the generic information which a slave DMA can use
+   is in struct dma_slave_config.  This allows the clients to specify
+   DMA direction, DMA addresses, bus widths, DMA burst lengths etc
+   for the peripheral.
+
+   If some DMA controllers have more parameters to be sent then they
+   should try to embed struct dma_slave_config in their controller
+   specific structure. That gives flexibility to client to pass more
+   parameters, if required.
+
+   Interface:
+	int dmaengine_slave_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+
+   Please see the dma_slave_config structure definition in dmaengine.h
+   for a detailed explaination of the struct members.  Please note
+   that the 'direction' member will be going away as it duplicates the
+   direction given in the prepare call.
 
 3. Get a descriptor for transaction
-For slave usage the various modes of slave transfers supported by the
-DMA-engine are:
-slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
-dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
+
+   For slave usage the various modes of slave transfers supported by the
+   DMA-engine are:
+
+   slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
+   dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
 		  operation is explicitly stopped.
-The non NULL return of this transfer API represents a "descriptor" for the given
-transaction.
-
-Interface:
-struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)(
-		struct dma_chan *chan,
-		struct scatterlist *dst_sg, unsigned int dst_nents,
-		struct scatterlist *src_sg, unsigned int src_nents,
+
+   A non-NULL return of this transfer API represents a "descriptor" for
+   the given transaction.
+
+   Interface:
+	struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
-struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
+
+	struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_data_direction direction);
 
-4. Submit the transaction and wait for callback notification
-To schedule the transaction to be scheduled by dma device, the "descriptor"
-returned in above (3) needs to be submitted.
-To tell the dma driver that a transaction is ready to be serviced, the
-descriptor->submit() callback needs to be invoked. This chains the descriptor to
-the pending queue.
-The transactions in the pending queue can be activated by calling the
-issue_pending API. If channel is idle then the first transaction in queue is
-started and subsequent ones queued up.
-On completion of the DMA operation the next in queue is submitted and a tasklet
-triggered. The tasklet would then call the client driver completion callback
-routine for notification, if set.
-Interface:
-void dma_async_issue_pending(struct dma_chan *chan);
-
-==============================================================================
-
-Additional usage notes for dma driver writers
-1/ Although DMA engine specifies that completion callback routines cannot submit
-any new operations, but typically for slave DMA subsequent transaction may not
-be available for submit prior to callback routine being called. This requirement
-is not a requirement for DMA-slave devices. But they should take care to drop
-the spin-lock they might be holding before calling the callback routine
+   The peripheral driver is expected to have mapped the scatterlist for
+   the DMA operation prior to calling device_prep_slave_sg, and must
+   keep the scatterlist mapped until the DMA operation has completed.
+   The scatterlist must be mapped using the DMA struct device.  So,
+   normal setup should look like this:
+
+	nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len);
+	if (nr_sg == 0)
+		/* error */
+
+	desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg,
+			direction, flags);
+
+   Once a descriptor has been obtained, the callback information can be
+   added and the descriptor must then be submitted.  Some DMA engine
+   drivers may hold a spinlock between a successful preparation and
+   submission so it is important that these two operations are closely
+   paired.
+
+   Note:
+	Although the async_tx API specifies that completion callback
+	routines cannot submit any new operations, this is not the
+	case for slave/cyclic DMA.
+
+	For slave DMA, the subsequent transaction may not be available
+	for submission prior to callback function being invoked, so
+	slave DMA callbacks are permitted to prepare and submit a new
+	transaction.
+
+	For cyclic DMA, a callback function may wish to terminate the
+	DMA via dmaengine_terminate_all().
+
+	Therefore, it is important that DMA engine drivers drop any
+	locks before calling the callback function which may cause a
+	deadlock.
+
+	Note that callbacks will always be invoked from the DMA
+	engines tasklet, never from interrupt context.
+
+4. Submit the transaction
+
+   Once the descriptor has been prepared and the callback information
+   added, it must be placed on the DMA engine drivers pending queue.
+
+   Interface:
+	dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
+
+   This returns a cookie can be used to check the progress of DMA engine
+   activity via other DMA engine calls not covered in this document.
+
+   dmaengine_submit() will not start the DMA operation, it merely adds
+   it to the pending queue.  For this, see step 5, dma_async_issue_pending.
+
+5. Issue pending DMA requests and wait for callback notification
+
+   The transactions in the pending queue can be activated by calling the
+   issue_pending API. If channel is idle then the first transaction in
+   queue is started and subsequent ones queued up.
+
+   On completion of each DMA operation, the next in queue is started and
+   a tasklet triggered. The tasklet will then call the client driver
+   completion callback routine for notification, if set.
+
+   Interface:
+	void dma_async_issue_pending(struct dma_chan *chan);
+
+Further APIs:
+
+1. int dmaengine_terminate_all(struct dma_chan *chan)
+
+   This causes all activity for the DMA channel to be stopped, and may
+   discard data in the DMA FIFO which hasn't been fully transferred.
+   No callback functions will be called for any incomplete transfers.
+
+2. int dmaengine_pause(struct dma_chan *chan)
+
+   This pauses activity on the DMA channel without data loss.
+
+3. int dmaengine_resume(struct dma_chan *chan)
+
+   Resume a previously paused DMA channel.  It is invalid to resume a
+   channel which is not currently paused.
+
+4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
+        dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
+
+   This can be used to check the status of the channel.  Please see
+   the documentation in include/linux/dmaengine.h for a more complete
+   description of this API.
+
+   This can be used in conjunction with dma_async_is_complete() and
+   the cookie returned from 'descriptor->submit()' to check for
+   completion of a specific DMA transaction.
+
+   Note:
+	Not all DMA engine drivers can return reliable information for
+	a running DMA channel.  It is recommended that DMA engine users
+	pause or stop (via dmaengine_terminate_all) the channel before
+	using this API.
diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi
index 6325f5b48635..d8eb01c15db1 100644
--- a/Documentation/spi/ep93xx_spi
+++ b/Documentation/spi/ep93xx_spi
@@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void)
 			    ARRAY_SIZE(ts72xx_spi_devices));
 }
 
+The driver can use DMA for the transfers also. In this case ts72xx_spi_info
+becomes:
+
+static struct ep93xx_spi_info ts72xx_spi_info = {
+	.num_chipselect	= ARRAY_SIZE(ts72xx_spi_devices),
+	.use_dma	= true;
+};
+
+Note that CONFIG_EP93XX_DMA should be enabled as well.
+
 Thanks to
 =========
 Martin Guy, H. Hartley Sweeten and others who helped me during development of
diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile
index 33ee2c863d18..21e721ab7378 100644
--- a/arch/arm/mach-ep93xx/Makefile
+++ b/arch/arm/mach-ep93xx/Makefile
@@ -1,11 +1,13 @@
 #
 # Makefile for the linux kernel.
 #
-obj-y			:= core.o clock.o dma-m2p.o gpio.o
+obj-y			:= core.o clock.o gpio.o
 obj-m			:=
 obj-n			:=
 obj-			:=
 
+obj-$(CONFIG_EP93XX_DMA)	+= dma.o
+
 obj-$(CONFIG_MACH_ADSSPHERE)	+= adssphere.o
 obj-$(CONFIG_MACH_EDB93XX)	+= edb93xx.o
 obj-$(CONFIG_MACH_GESBC9312)	+= gesbc9312.o
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 6659a0d137a3..dd87a8272237 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -492,11 +492,15 @@ static struct resource ep93xx_spi_resources[] = {
 	},
 };
 
+static u64 ep93xx_spi_dma_mask = DMA_BIT_MASK(32);
+
 static struct platform_device ep93xx_spi_device = {
 	.name		= "ep93xx-spi",
 	.id		= 0,
 	.dev		= {
-		.platform_data = &ep93xx_spi_master_data,
+		.platform_data		= &ep93xx_spi_master_data,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.dma_mask		= &ep93xx_spi_dma_mask,
 	},
 	.num_resources	= ARRAY_SIZE(ep93xx_spi_resources),
 	.resource	= ep93xx_spi_resources,
diff --git a/arch/arm/mach-ep93xx/dma-m2p.c b/arch/arm/mach-ep93xx/dma-m2p.c
deleted file mode 100644
index a696d354b1f8..000000000000
--- a/arch/arm/mach-ep93xx/dma-m2p.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/dma-m2p.c
- * M2P DMA handling for Cirrus EP93xx chips.
- *
- * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
- * Copyright (C) 2006 Applied Data Systems
- *
- * Copyright (C) 2009 Ryan Mallon <ryan@bluewatersys.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-/*
- * On the EP93xx chip the following peripherals my be allocated to the 10
- * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive).
- *
- *	I2S	contains 3 Tx and 3 Rx DMA Channels
- *	AAC	contains 3 Tx and 3 Rx DMA Channels
- *	UART1	contains 1 Tx and 1 Rx DMA Channels
- *	UART2	contains 1 Tx and 1 Rx DMA Channels
- *	UART3	contains 1 Tx and 1 Rx DMA Channels
- *	IrDA	contains 1 Tx and 1 Rx DMA Channels
- *
- * SSP and IDE use the Memory to Memory (M2M) channels and are not covered
- * with this implementation.
- */
-
-#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/io.h>
-
-#include <mach/dma.h>
-#include <mach/hardware.h>
-
-#define M2P_CONTROL			0x00
-#define  M2P_CONTROL_STALL_IRQ_EN	(1 << 0)
-#define  M2P_CONTROL_NFB_IRQ_EN		(1 << 1)
-#define  M2P_CONTROL_ERROR_IRQ_EN	(1 << 3)
-#define  M2P_CONTROL_ENABLE		(1 << 4)
-#define M2P_INTERRUPT			0x04
-#define  M2P_INTERRUPT_STALL		(1 << 0)
-#define  M2P_INTERRUPT_NFB		(1 << 1)
-#define  M2P_INTERRUPT_ERROR		(1 << 3)
-#define M2P_PPALLOC			0x08
-#define M2P_STATUS			0x0c
-#define M2P_REMAIN			0x14
-#define M2P_MAXCNT0			0x20
-#define M2P_BASE0			0x24
-#define M2P_MAXCNT1			0x30
-#define M2P_BASE1			0x34
-
-#define STATE_IDLE	0	/* Channel is inactive.  */
-#define STATE_STALL	1	/* Channel is active, no buffers pending.  */
-#define STATE_ON	2	/* Channel is active, one buffer pending.  */
-#define STATE_NEXT	3	/* Channel is active, two buffers pending.  */
-
-struct m2p_channel {
-	char				*name;
-	void __iomem			*base;
-	int				irq;
-
-	struct clk			*clk;
-	spinlock_t			lock;
-
-	void				*client;
-	unsigned			next_slot:1;
-	struct ep93xx_dma_buffer	*buffer_xfer;
-	struct ep93xx_dma_buffer	*buffer_next;
-	struct list_head		buffers_pending;
-};
-
-static struct m2p_channel m2p_rx[] = {
-	{"m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1},
-	{"m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3},
-	{"m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5},
-	{"m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7},
-	{"m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9},
-	{NULL},
-};
-
-static struct m2p_channel m2p_tx[] = {
-	{"m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0},
-	{"m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2},
-	{"m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4},
-	{"m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6},
-	{"m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8},
-	{NULL},
-};
-
-static void feed_buf(struct m2p_channel *ch, struct ep93xx_dma_buffer *buf)
-{
-	if (ch->next_slot == 0) {
-		writel(buf->size, ch->base + M2P_MAXCNT0);
-		writel(buf->bus_addr, ch->base + M2P_BASE0);
-	} else {
-		writel(buf->size, ch->base + M2P_MAXCNT1);
-		writel(buf->bus_addr, ch->base + M2P_BASE1);
-	}
-	ch->next_slot ^= 1;
-}
-
-static void choose_buffer_xfer(struct m2p_channel *ch)
-{
-	struct ep93xx_dma_buffer *buf;
-
-	ch->buffer_xfer = NULL;
-	if (!list_empty(&ch->buffers_pending)) {
-		buf = list_entry(ch->buffers_pending.next,
-				 struct ep93xx_dma_buffer, list);
-		list_del(&buf->list);
-		feed_buf(ch, buf);
-		ch->buffer_xfer = buf;
-	}
-}
-
-static void choose_buffer_next(struct m2p_channel *ch)
-{
-	struct ep93xx_dma_buffer *buf;
-
-	ch->buffer_next = NULL;
-	if (!list_empty(&ch->buffers_pending)) {
-		buf = list_entry(ch->buffers_pending.next,
-				 struct ep93xx_dma_buffer, list);
-		list_del(&buf->list);
-		feed_buf(ch, buf);
-		ch->buffer_next = buf;
-	}
-}
-
-static inline void m2p_set_control(struct m2p_channel *ch, u32 v)
-{
-	/*
-	 * The control register must be read immediately after being written so
-	 * that the internal state machine is correctly updated. See the ep93xx
-	 * users' guide for details.
-	 */
-	writel(v, ch->base + M2P_CONTROL);
-	readl(ch->base + M2P_CONTROL);
-}
-
-static inline int m2p_channel_state(struct m2p_channel *ch)
-{
-	return (readl(ch->base + M2P_STATUS) >> 4) & 0x3;
-}
-
-static irqreturn_t m2p_irq(int irq, void *dev_id)
-{
-	struct m2p_channel *ch = dev_id;
-	struct ep93xx_dma_m2p_client *cl;
-	u32 irq_status, v;
-	int error = 0;
-
-	cl = ch->client;
-
-	spin_lock(&ch->lock);
-	irq_status = readl(ch->base + M2P_INTERRUPT);
-
-	if (irq_status & M2P_INTERRUPT_ERROR) {
-		writel(M2P_INTERRUPT_ERROR, ch->base + M2P_INTERRUPT);
-		error = 1;
-	}
-
-	if ((irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) == 0) {
-		spin_unlock(&ch->lock);
-		return IRQ_NONE;
-	}
-
-	switch (m2p_channel_state(ch)) {
-	case STATE_IDLE:
-		pr_crit("dma interrupt without a dma buffer\n");
-		BUG();
-		break;
-
-	case STATE_STALL:
-		cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error);
-		if (ch->buffer_next != NULL) {
-			cl->buffer_finished(cl->cookie, ch->buffer_next,
-					    0, error);
-		}
-		choose_buffer_xfer(ch);
-		choose_buffer_next(ch);
-		if (ch->buffer_xfer != NULL)
-			cl->buffer_started(cl->cookie, ch->buffer_xfer);
-		break;
-
-	case STATE_ON:
-		cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error);
-		ch->buffer_xfer = ch->buffer_next;
-		choose_buffer_next(ch);
-		cl->buffer_started(cl->cookie, ch->buffer_xfer);
-		break;
-
-	case STATE_NEXT:
-		pr_crit("dma interrupt while next\n");
-		BUG();
-		break;
-	}
-
-	v = readl(ch->base + M2P_CONTROL) & ~(M2P_CONTROL_STALL_IRQ_EN |
-					      M2P_CONTROL_NFB_IRQ_EN);
-	if (ch->buffer_xfer != NULL)
-		v |= M2P_CONTROL_STALL_IRQ_EN;
-	if (ch->buffer_next != NULL)
-		v |= M2P_CONTROL_NFB_IRQ_EN;
-	m2p_set_control(ch, v);
-
-	spin_unlock(&ch->lock);
-	return IRQ_HANDLED;
-}
-
-static struct m2p_channel *find_free_channel(struct ep93xx_dma_m2p_client *cl)
-{
-	struct m2p_channel *ch;
-	int i;
-
-	if (cl->flags & EP93XX_DMA_M2P_RX)
-		ch = m2p_rx;
-	else
-		ch = m2p_tx;
-
-	for (i = 0; ch[i].base; i++) {
-		struct ep93xx_dma_m2p_client *client;
-
-		client = ch[i].client;
-		if (client != NULL) {
-			int port;
-
-			port = cl->flags & EP93XX_DMA_M2P_PORT_MASK;
-			if (port == (client->flags &
-				     EP93XX_DMA_M2P_PORT_MASK)) {
-				pr_warning("DMA channel already used by %s\n",
-					   cl->name ? : "unknown client");
-				return ERR_PTR(-EBUSY);
-			}
-		}
-	}
-
-	for (i = 0; ch[i].base; i++) {
-		if (ch[i].client == NULL)
-			return ch + i;
-	}
-
-	pr_warning("No free DMA channel for %s\n",
-		   cl->name ? : "unknown client");
-	return ERR_PTR(-ENODEV);
-}
-
-static void channel_enable(struct m2p_channel *ch)
-{
-	struct ep93xx_dma_m2p_client *cl = ch->client;
-	u32 v;
-
-	clk_enable(ch->clk);
-
-	v = cl->flags & EP93XX_DMA_M2P_PORT_MASK;
-	writel(v, ch->base + M2P_PPALLOC);
-
-	v = cl->flags & EP93XX_DMA_M2P_ERROR_MASK;
-	v |= M2P_CONTROL_ENABLE | M2P_CONTROL_ERROR_IRQ_EN;
-	m2p_set_control(ch, v);
-}
-
-static void channel_disable(struct m2p_channel *ch)
-{
-	u32 v;
-
-	v = readl(ch->base + M2P_CONTROL);
-	v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN);
-	m2p_set_control(ch, v);
-
-	while (m2p_channel_state(ch) >= STATE_ON)
-		cpu_relax();
-
-	m2p_set_control(ch, 0x0);
-
-	while (m2p_channel_state(ch) == STATE_STALL)
-		cpu_relax();
-
-	clk_disable(ch->clk);
-}
-
-int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *cl)
-{
-	struct m2p_channel *ch;
-	int err;
-
-	ch = find_free_channel(cl);
-	if (IS_ERR(ch))
-		return PTR_ERR(ch);
-
-	err = request_irq(ch->irq, m2p_irq, 0, cl->name ? : "dma-m2p", ch);
-	if (err)
-		return err;
-
-	ch->client = cl;
-	ch->next_slot = 0;
-	ch->buffer_xfer = NULL;
-	ch->buffer_next = NULL;
-	INIT_LIST_HEAD(&ch->buffers_pending);
-
-	cl->channel = ch;
-
-	channel_enable(ch);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_register);
-
-void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *cl)
-{
-	struct m2p_channel *ch = cl->channel;
-
-	channel_disable(ch);
-	free_irq(ch->irq, ch);
-	ch->client = NULL;
-}
-EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_unregister);
-
-void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *cl,
-			   struct ep93xx_dma_buffer *buf)
-{
-	struct m2p_channel *ch = cl->channel;
-	unsigned long flags;
-	u32 v;
-
-	spin_lock_irqsave(&ch->lock, flags);
-	v = readl(ch->base + M2P_CONTROL);
-	if (ch->buffer_xfer == NULL) {
-		ch->buffer_xfer = buf;
-		feed_buf(ch, buf);
-		cl->buffer_started(cl->cookie, buf);
-
-		v |= M2P_CONTROL_STALL_IRQ_EN;
-		m2p_set_control(ch, v);
-
-	} else if (ch->buffer_next == NULL) {
-		ch->buffer_next = buf;
-		feed_buf(ch, buf);
-
-		v |= M2P_CONTROL_NFB_IRQ_EN;
-		m2p_set_control(ch, v);
-	} else {
-		list_add_tail(&buf->list, &ch->buffers_pending);
-	}
-	spin_unlock_irqrestore(&ch->lock, flags);
-}
-EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit);
-
-void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *cl,
-				     struct ep93xx_dma_buffer *buf)
-{
-	struct m2p_channel *ch = cl->channel;
-
-	list_add_tail(&buf->list, &ch->buffers_pending);
-}
-EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit_recursive);
-
-void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *cl)
-{
-	struct m2p_channel *ch = cl->channel;
-
-	channel_disable(ch);
-	ch->next_slot = 0;
-	ch->buffer_xfer = NULL;
-	ch->buffer_next = NULL;
-	INIT_LIST_HEAD(&ch->buffers_pending);
-	channel_enable(ch);
-}
-EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_flush);
-
-static int init_channel(struct m2p_channel *ch)
-{
-	ch->clk = clk_get(NULL, ch->name);
-	if (IS_ERR(ch->clk))
-		return PTR_ERR(ch->clk);
-
-	spin_lock_init(&ch->lock);
-	ch->client = NULL;
-
-	return 0;
-}
-
-static int __init ep93xx_dma_m2p_init(void)
-{
-	int i;
-	int ret;
-
-	for (i = 0; m2p_rx[i].base; i++) {
-		ret = init_channel(m2p_rx + i);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; m2p_tx[i].base; i++) {
-		ret = init_channel(m2p_tx + i);
-		if (ret)
-			return ret;
-	}
-
-	pr_info("M2P DMA subsystem initialized\n");
-	return 0;
-}
-arch_initcall(ep93xx_dma_m2p_init);
diff --git a/arch/arm/mach-ep93xx/dma.c b/arch/arm/mach-ep93xx/dma.c
new file mode 100644
index 000000000000..5a2570881255
--- /dev/null
+++ b/arch/arm/mach-ep93xx/dma.c
@@ -0,0 +1,108 @@
+/*
+ * arch/arm/mach-ep93xx/dma.c
+ *
+ * Platform support code for the EP93xx dmaengine driver.
+ *
+ * Copyright (C) 2011 Mika Westerberg
+ *
+ * This work is based on the original dma-m2p implementation with
+ * following copyrights:
+ *
+ *   Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ *   Copyright (C) 2006 Applied Data Systems
+ *   Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <mach/dma.h>
+#include <mach/hardware.h>
+
+#define DMA_CHANNEL(_name, _base, _irq) \
+	{ .name = (_name), .base = (_base), .irq = (_irq) }
+
+/*
+ * DMA M2P channels.
+ *
+ * On the EP93xx chip the following peripherals my be allocated to the 10
+ * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive).
+ *
+ *	I2S	contains 3 Tx and 3 Rx DMA Channels
+ *	AAC	contains 3 Tx and 3 Rx DMA Channels
+ *	UART1	contains 1 Tx and 1 Rx DMA Channels
+ *	UART2	contains 1 Tx and 1 Rx DMA Channels
+ *	UART3	contains 1 Tx and 1 Rx DMA Channels
+ *	IrDA	contains 1 Tx and 1 Rx DMA Channels
+ *
+ * Registers are mapped statically in ep93xx_map_io().
+ */
+static struct ep93xx_dma_chan_data ep93xx_dma_m2p_channels[] = {
+	DMA_CHANNEL("m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0),
+	DMA_CHANNEL("m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1),
+	DMA_CHANNEL("m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2),
+	DMA_CHANNEL("m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3),
+	DMA_CHANNEL("m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4),
+	DMA_CHANNEL("m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5),
+	DMA_CHANNEL("m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6),
+	DMA_CHANNEL("m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7),
+	DMA_CHANNEL("m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8),
+	DMA_CHANNEL("m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9),
+};
+
+static struct ep93xx_dma_platform_data ep93xx_dma_m2p_data = {
+	.channels		= ep93xx_dma_m2p_channels,
+	.num_channels		= ARRAY_SIZE(ep93xx_dma_m2p_channels),
+};
+
+static struct platform_device ep93xx_dma_m2p_device = {
+	.name			= "ep93xx-dma-m2p",
+	.id			= -1,
+	.dev			= {
+		.platform_data	= &ep93xx_dma_m2p_data,
+	},
+};
+
+/*
+ * DMA M2M channels.
+ *
+ * There are 2 M2M channels which support memcpy/memset and in addition simple
+ * hardware requests from/to SSP and IDE. We do not implement an external
+ * hardware requests.
+ *
+ * Registers are mapped statically in ep93xx_map_io().
+ */
+static struct ep93xx_dma_chan_data ep93xx_dma_m2m_channels[] = {
+	DMA_CHANNEL("m2m0", EP93XX_DMA_BASE + 0x0100, IRQ_EP93XX_DMAM2M0),
+	DMA_CHANNEL("m2m1", EP93XX_DMA_BASE + 0x0140, IRQ_EP93XX_DMAM2M1),
+};
+
+static struct ep93xx_dma_platform_data ep93xx_dma_m2m_data = {
+	.channels		= ep93xx_dma_m2m_channels,
+	.num_channels		= ARRAY_SIZE(ep93xx_dma_m2m_channels),
+};
+
+static struct platform_device ep93xx_dma_m2m_device = {
+	.name			= "ep93xx-dma-m2m",
+	.id			= -1,
+	.dev			= {
+		.platform_data	= &ep93xx_dma_m2m_data,
+	},
+};
+
+static int __init ep93xx_dma_init(void)
+{
+	platform_device_register(&ep93xx_dma_m2p_device);
+	platform_device_register(&ep93xx_dma_m2m_device);
+	return 0;
+}
+arch_initcall(ep93xx_dma_init);
diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h
index 5e31b2b25da9..46d4d876e6fb 100644
--- a/arch/arm/mach-ep93xx/include/mach/dma.h
+++ b/arch/arm/mach-ep93xx/include/mach/dma.h
@@ -1,149 +1,93 @@
-/**
- * DOC: EP93xx DMA M2P memory to peripheral and peripheral to memory engine
- *
- * The EP93xx DMA M2P subsystem handles DMA transfers between memory and
- * peripherals. DMA M2P channels are available for audio, UARTs and IrDA.
- * See chapter 10 of the EP93xx users guide for full details on the DMA M2P
- * engine.
- *
- * See sound/soc/ep93xx/ep93xx-pcm.c for an example use of the DMA M2P code.
- *
- */
-
 #ifndef __ASM_ARCH_DMA_H
 #define __ASM_ARCH_DMA_H
 
-#include <linux/list.h>
 #include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 
-/**
- * struct ep93xx_dma_buffer - Information about a buffer to be transferred
- * using the DMA M2P engine
+/*
+ * M2P channels.
  *
- * @list: Entry in DMA buffer list
- * @bus_addr: Physical address of the buffer
- * @size: Size of the buffer in bytes
+ * Note that these values are also directly used for setting the PPALLOC
+ * register.
  */
-struct ep93xx_dma_buffer {
-	struct list_head	list;
-	u32			bus_addr;
-	u16			size;
-};
+#define EP93XX_DMA_I2S1		0
+#define EP93XX_DMA_I2S2		1
+#define EP93XX_DMA_AAC1		2
+#define EP93XX_DMA_AAC2		3
+#define EP93XX_DMA_AAC3		4
+#define EP93XX_DMA_I2S3		5
+#define EP93XX_DMA_UART1	6
+#define EP93XX_DMA_UART2	7
+#define EP93XX_DMA_UART3	8
+#define EP93XX_DMA_IRDA		9
+/* M2M channels */
+#define EP93XX_DMA_SSP		10
+#define EP93XX_DMA_IDE		11
 
 /**
- * struct ep93xx_dma_m2p_client - Information about a DMA M2P client
- *
- * @name: Unique name for this client
- * @flags: Client flags
- * @cookie: User data to pass to callback functions
- * @buffer_started: Non NULL function to call when a transfer is started.
- * 			The arguments are the user data cookie and the DMA
- *			buffer which is starting.
- * @buffer_finished: Non NULL function to call when a transfer is completed.
- *			The arguments are the user data cookie, the DMA buffer
- *			which has completed, and a boolean flag indicating if
- *			the transfer had an error.
+ * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine
+ * @port: peripheral which is requesting the channel
+ * @direction: TX/RX channel
+ * @name: optional name for the channel, this is displayed in /proc/interrupts
+ *
+ * This information is passed as private channel parameter in a filter
+ * function. Note that this is only needed for slave/cyclic channels.  For
+ * memcpy channels %NULL data should be passed.
  */
-struct ep93xx_dma_m2p_client {
-	char			*name;
-	u8			flags;
-	void			*cookie;
-	void			(*buffer_started)(void *cookie,
-					struct ep93xx_dma_buffer *buf);
-	void			(*buffer_finished)(void *cookie,
-					struct ep93xx_dma_buffer *buf,
-					int bytes, int error);
-
-	/* private: Internal use only */
-	void			*channel;
+struct ep93xx_dma_data {
+	int				port;
+	enum dma_data_direction		direction;
+	const char			*name;
 };
 
-/* DMA M2P ports */
-#define EP93XX_DMA_M2P_PORT_I2S1	0x00
-#define EP93XX_DMA_M2P_PORT_I2S2	0x01
-#define EP93XX_DMA_M2P_PORT_AAC1	0x02
-#define EP93XX_DMA_M2P_PORT_AAC2	0x03
-#define EP93XX_DMA_M2P_PORT_AAC3	0x04
-#define EP93XX_DMA_M2P_PORT_I2S3	0x05
-#define EP93XX_DMA_M2P_PORT_UART1	0x06
-#define EP93XX_DMA_M2P_PORT_UART2	0x07
-#define EP93XX_DMA_M2P_PORT_UART3	0x08
-#define EP93XX_DMA_M2P_PORT_IRDA	0x09
-#define EP93XX_DMA_M2P_PORT_MASK	0x0f
-
-/* DMA M2P client flags */
-#define EP93XX_DMA_M2P_TX		0x00	/* Memory to peripheral */
-#define EP93XX_DMA_M2P_RX		0x10	/* Peripheral to memory */
-
-/*
- * DMA M2P client error handling flags. See the EP93xx users guide
- * documentation on the DMA M2P CONTROL register for more details
- */
-#define EP93XX_DMA_M2P_ABORT_ON_ERROR	0x20	/* Abort on peripheral error */
-#define EP93XX_DMA_M2P_IGNORE_ERROR	0x40	/* Ignore peripheral errors */
-#define EP93XX_DMA_M2P_ERROR_MASK	0x60	/* Mask of error bits */
-
 /**
- * ep93xx_dma_m2p_client_register - Register a client with the DMA M2P
- * subsystem
- *
- * @m2p: Client information to register
- * returns 0 on success
- *
- * The DMA M2P subsystem allocates a channel and an interrupt line for the DMA
- * client
+ * struct ep93xx_dma_chan_data - platform specific data for a DMA channel
+ * @name: name of the channel, used for getting the right clock for the channel
+ * @base: mapped registers
+ * @irq: interrupt number used by this channel
  */
-int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *m2p);
+struct ep93xx_dma_chan_data {
+	const char			*name;
+	void __iomem			*base;
+	int				irq;
+};
 
 /**
- * ep93xx_dma_m2p_client_unregister - Unregister a client from the DMA M2P
- * subsystem
- *
- * @m2p: Client to unregister
+ * struct ep93xx_dma_platform_data - platform data for the dmaengine driver
+ * @channels: array of channels which are passed to the driver
+ * @num_channels: number of channels in the array
  *
- * Any transfers currently in progress will be completed in hardware, but
- * ignored in software.
+ * This structure is passed to the DMA engine driver via platform data. For
+ * M2P channels, contract is that even channels are for TX and odd for RX.
+ * There is no requirement for the M2M channels.
  */
-void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *m2p);
+struct ep93xx_dma_platform_data {
+	struct ep93xx_dma_chan_data	*channels;
+	size_t				num_channels;
+};
 
-/**
- * ep93xx_dma_m2p_submit - Submit a DMA M2P transfer
- *
- * @m2p: DMA Client to submit the transfer on
- * @buf: DMA Buffer to submit
- *
- * If the current or next transfer positions are free on the M2P client then
- * the transfer is started immediately. If not, the transfer is added to the
- * list of pending transfers. This function must not be called from the
- * buffer_finished callback for an M2P channel.
- *
- */
-void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *m2p,
-			   struct ep93xx_dma_buffer *buf);
+static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p");
+}
 
 /**
- * ep93xx_dma_m2p_submit_recursive - Put a DMA transfer on the pending list
- * for an M2P channel
+ * ep93xx_dma_chan_direction - returns direction the channel can be used
+ * @chan: channel
  *
- * @m2p: DMA Client to submit the transfer on
- * @buf: DMA Buffer to submit
- *
- * This function must only be called from the buffer_finished callback for an
- * M2P channel. It is commonly used to add the next transfer in a chained list
- * of DMA transfers.
+ * This function can be used in filter functions to find out whether the
+ * channel supports given DMA direction. Only M2P channels have such
+ * limitation, for M2M channels the direction is configurable.
  */
-void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *m2p,
-				     struct ep93xx_dma_buffer *buf);
+static inline enum dma_data_direction
+ep93xx_dma_chan_direction(struct dma_chan *chan)
+{
+	if (!ep93xx_dma_chan_is_m2p(chan))
+		return DMA_NONE;
 
-/**
- * ep93xx_dma_m2p_flush - Flush all pending transfers on a DMA M2P client
- *
- * @m2p: DMA client to flush transfers on
- *
- * Any transfers currently in progress will be completed in hardware, but
- * ignored in software.
- *
- */
-void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *m2p);
+	/* even channels are for TX, odd for RX */
+	return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+}
 
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h
index 0a37961b3453..9bb63ac13f04 100644
--- a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h
@@ -7,9 +7,11 @@ struct spi_device;
  * struct ep93xx_spi_info - EP93xx specific SPI descriptor
  * @num_chipselect: number of chip selects on this board, must be
  *                  at least one
+ * @use_dma: use DMA for the transfers
  */
 struct ep93xx_spi_info {
 	int	num_chipselect;
+	bool	use_dma;
 };
 
 /**
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 25cf327cd1cb..2e3b3d38c465 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -237,6 +237,13 @@ config MXS_DMA
 	  Support the MXS DMA engine. This engine including APBH-DMA
 	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
 
+config EP93XX_DMA
+	bool "Cirrus Logic EP93xx DMA support"
+	depends on ARCH_EP93XX
+	select DMA_ENGINE
+	help
+	  Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 836095ab3c5c..30cf3b1f0c5c 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
+obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
index a4af8589330c..734ed0206cd5 100644
--- a/drivers/dma/TODO
+++ b/drivers/dma/TODO
@@ -9,6 +9,5 @@ TODO for slave dma
 	- mxs-dma.c
 	- dw_dmac
 	- intel_mid_dma
-	- ste_dma40
 4. Check other subsystems for dma drivers and merge/move to dmaengine
 5. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e6d7228b1479..196a7378d332 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -156,14 +156,10 @@ struct pl08x_driver_data {
 #define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
 #define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
 
-/* Minimum period between work queue runs */
-#define PL08X_WQ_PERIODMIN	20
-
 /* Size (bytes) of each LLI buffer allocated for one transfer */
 # define PL08X_LLI_TSFR_SIZE	0x2000
 
 /* Maximum times we call dma_pool_alloc on this pool without freeing */
-#define PL08X_MAX_ALLOCS	0x40
 #define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
 #define PL08X_ALIGN		8
 
@@ -495,10 +491,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
 
 struct pl08x_lli_build_data {
 	struct pl08x_txd *txd;
-	struct pl08x_driver_data *pl08x;
 	struct pl08x_bus_data srcbus;
 	struct pl08x_bus_data dstbus;
 	size_t remainder;
+	u32 lli_bus;
 };
 
 /*
@@ -551,8 +547,7 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
 	llis_va[num_llis].src = bd->srcbus.addr;
 	llis_va[num_llis].dst = bd->dstbus.addr;
 	llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
-	if (bd->pl08x->lli_buses & PL08X_AHB2)
-		llis_va[num_llis].lli |= PL080_LLI_LM_AHB2;
+	llis_va[num_llis].lli |= bd->lli_bus;
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
 		bd->srcbus.addr += len;
@@ -605,9 +600,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	cctl = txd->cctl;
 
 	bd.txd = txd;
-	bd.pl08x = pl08x;
 	bd.srcbus.addr = txd->src_addr;
 	bd.dstbus.addr = txd->dst_addr;
+	bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
 
 	/* Find maximum width of the source bus */
 	bd.srcbus.maxwidth =
@@ -622,25 +617,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	/* Set up the bus widths to the maximum */
 	bd.srcbus.buswidth = bd.srcbus.maxwidth;
 	bd.dstbus.buswidth = bd.dstbus.maxwidth;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
-		 __func__, bd.srcbus.buswidth, bd.dstbus.buswidth);
-
 
 	/*
 	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
 	 */
 	max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
 		PL080_CONTROL_TRANSFER_SIZE_MASK;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s max bytes per lli = %zu\n",
-		 __func__, max_bytes_per_lli);
 
 	/* We need to count this down to zero */
 	bd.remainder = txd->len;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s remainder = %zu\n",
-		 __func__, bd.remainder);
 
 	/*
 	 * Choose bus to align to
@@ -649,6 +634,16 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	 */
 	pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 
+	dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n",
+		 bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+		 bd.srcbus.buswidth,
+		 bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+		 bd.dstbus.buswidth,
+		 bd.remainder, max_bytes_per_lli);
+	dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
+		 mbus == &bd.srcbus ? "src" : "dst",
+		 sbus == &bd.srcbus ? "src" : "dst");
+
 	if (txd->len < mbus->buswidth) {
 		/* Less than a bus width available - send as single bytes */
 		while (bd.remainder) {
@@ -840,15 +835,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	{
 		int i;
 
+		dev_vdbg(&pl08x->adev->dev,
+			 "%-3s %-9s  %-10s %-10s %-10s %s\n",
+			 "lli", "", "csrc", "cdst", "clli", "cctl");
 		for (i = 0; i < num_llis; i++) {
 			dev_vdbg(&pl08x->adev->dev,
-				 "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n",
-				 i,
-				 &llis_va[i],
-				 llis_va[i].src,
-				 llis_va[i].dst,
-				 llis_va[i].cctl,
-				 llis_va[i].lli
+				 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				 i, &llis_va[i], llis_va[i].src,
+				 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
 				);
 		}
 	}
@@ -1054,64 +1048,105 @@ pl08x_dma_tx_status(struct dma_chan *chan,
 
 /* PrimeCell DMA extension */
 struct burst_table {
-	int burstwords;
+	u32 burstwords;
 	u32 reg;
 };
 
 static const struct burst_table burst_sizes[] = {
 	{
 		.burstwords = 256,
-		.reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_256,
 	},
 	{
 		.burstwords = 128,
-		.reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_128,
 	},
 	{
 		.burstwords = 64,
-		.reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_64,
 	},
 	{
 		.burstwords = 32,
-		.reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_32,
 	},
 	{
 		.burstwords = 16,
-		.reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_16,
 	},
 	{
 		.burstwords = 8,
-		.reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_8,
 	},
 	{
 		.burstwords = 4,
-		.reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_4,
 	},
 	{
-		.burstwords = 1,
-		.reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.burstwords = 0,
+		.reg = PL080_BSIZE_1,
 	},
 };
 
+/*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port.  We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(u8 src, u8 dst)
+{
+	u32 cctl = 0;
+
+	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+		cctl |= PL080_CONTROL_DST_AHB2;
+	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+		cctl |= PL080_CONTROL_SRC_AHB2;
+
+	return cctl;
+}
+
+static u32 pl08x_cctl(u32 cctl)
+{
+	cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+		  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+		  PL080_CONTROL_PROT_MASK);
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	return cctl | PL080_CONTROL_PROT_SYS;
+}
+
+static u32 pl08x_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return PL080_WIDTH_8BIT;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return PL080_WIDTH_16BIT;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return PL080_WIDTH_32BIT;
+	default:
+		return ~0;
+	}
+}
+
+static u32 pl08x_burst(u32 maxburst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
+		if (burst_sizes[i].burstwords <= maxburst)
+			break;
+
+	return burst_sizes[i].reg;
+}
+
 static int dma_set_runtime_config(struct dma_chan *chan,
 				  struct dma_slave_config *config)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
-	struct pl08x_channel_data *cd = plchan->cd;
 	enum dma_slave_buswidth addr_width;
-	dma_addr_t addr;
-	u32 maxburst;
+	u32 width, burst, maxburst;
 	u32 cctl = 0;
-	int i;
 
 	if (!plchan->slave)
 		return -EINVAL;
@@ -1119,11 +1154,9 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
 	if (config->direction == DMA_TO_DEVICE) {
-		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
 	} else if (config->direction == DMA_FROM_DEVICE) {
-		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 	} else {
@@ -1132,46 +1165,40 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 		return -EINVAL;
 	}
 
-	switch (addr_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	default:
+	width = pl08x_width(addr_width);
+	if (width == ~0) {
 		dev_err(&pl08x->adev->dev,
 			"bad runtime_config: alien address width\n");
 		return -EINVAL;
 	}
 
+	cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
+	cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
+
 	/*
-	 * Now decide on a maxburst:
 	 * If this channel will only request single transfers, set this
 	 * down to ONE element.  Also select one element if no maxburst
 	 * is specified.
 	 */
-	if (plchan->cd->single || maxburst == 0) {
-		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
+	if (plchan->cd->single)
+		maxburst = 1;
+
+	burst = pl08x_burst(maxburst);
+	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
+	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
+
+	if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+		plchan->src_addr = config->src_addr;
+		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
+			pl08x_select_bus(plchan->cd->periph_buses,
+					 pl08x->mem_buses);
 	} else {
-		for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
-			if (burst_sizes[i].burstwords <= maxburst)
-				break;
-		cctl |= burst_sizes[i].reg;
+		plchan->dst_addr = config->dst_addr;
+		plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR |
+			pl08x_select_bus(pl08x->mem_buses,
+					 plchan->cd->periph_buses);
 	}
 
-	plchan->runtime_addr = addr;
-
-	/* Modify the default channel data to fit PrimeCell request */
-	cd->cctl = cctl;
-
 	dev_dbg(&pl08x->adev->dev,
 		"configured channel %s (%s) for %s, data width %d, "
 		"maxburst %d words, LE, CCTL=0x%08x\n",
@@ -1270,23 +1297,6 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
 	return 0;
 }
 
-/*
- * Given the source and destination available bus masks, select which
- * will be routed to each port.  We try to have source and destination
- * on separate ports, but always respect the allowable settings.
- */
-static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst)
-{
-	u32 cctl = 0;
-
-	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
-		cctl |= PL080_CONTROL_DST_AHB2;
-	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
-		cctl |= PL080_CONTROL_SRC_AHB2;
-
-	return cctl;
-}
-
 static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
 	unsigned long flags)
 {
@@ -1338,8 +1348,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 	if (pl08x->vd->dualmaster)
-		txd->cctl |= pl08x_select_bus(pl08x,
-					pl08x->mem_buses, pl08x->mem_buses);
+		txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
+					      pl08x->mem_buses);
 
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
@@ -1356,7 +1366,6 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
-	u8 src_buses, dst_buses;
 	int ret;
 
 	/*
@@ -1390,42 +1399,22 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	txd->direction = direction;
 	txd->len = sgl->length;
 
-	txd->cctl = plchan->cd->cctl &
-			~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
-			  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
-			  PL080_CONTROL_PROT_MASK);
-
-	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
-	txd->cctl |= PL080_CONTROL_PROT_SYS;
-
 	if (direction == DMA_TO_DEVICE) {
 		txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-		txd->cctl |= PL080_CONTROL_SRC_INCR;
+		txd->cctl = plchan->dst_cctl;
 		txd->src_addr = sgl->dma_address;
-		if (plchan->runtime_addr)
-			txd->dst_addr = plchan->runtime_addr;
-		else
-			txd->dst_addr = plchan->cd->addr;
-		src_buses = pl08x->mem_buses;
-		dst_buses = plchan->cd->periph_buses;
+		txd->dst_addr = plchan->dst_addr;
 	} else if (direction == DMA_FROM_DEVICE) {
 		txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-		txd->cctl |= PL080_CONTROL_DST_INCR;
-		if (plchan->runtime_addr)
-			txd->src_addr = plchan->runtime_addr;
-		else
-			txd->src_addr = plchan->cd->addr;
+		txd->cctl = plchan->src_cctl;
+		txd->src_addr = plchan->src_addr;
 		txd->dst_addr = sgl->dma_address;
-		src_buses = plchan->cd->periph_buses;
-		dst_buses = pl08x->mem_buses;
 	} else {
 		dev_err(&pl08x->adev->dev,
 			"%s direction unsupported\n", __func__);
 		return NULL;
 	}
 
-	txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses);
-
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 		return NULL;
@@ -1676,6 +1665,20 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 	return mask ? IRQ_HANDLED : IRQ_NONE;
 }
 
+static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
+{
+	u32 cctl = pl08x_cctl(chan->cd->cctl);
+
+	chan->slave = true;
+	chan->name = chan->cd->bus_id;
+	chan->src_addr = chan->cd->addr;
+	chan->dst_addr = chan->cd->addr;
+	chan->src_cctl = cctl | PL080_CONTROL_DST_INCR |
+		pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses);
+	chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR |
+		pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses);
+}
+
 /*
  * Initialise the DMAC memcpy/slave channels.
  * Make a local wrapper to hold required data
@@ -1707,9 +1710,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 		chan->state = PL08X_CHAN_IDLE;
 
 		if (slave) {
-			chan->slave = true;
-			chan->name = pl08x->pd->slave_channels[i].bus_id;
 			chan->cd = &pl08x->pd->slave_channels[i];
+			pl08x_dma_slave_init(chan);
 		} else {
 			chan->cd = &pl08x->pd->memcpy_channel;
 			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 36144f88d718..6a483eac7b3f 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.cap_mask = pdata->cap_mask;
 	atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
-	size = io->end - io->start + 1;
+	size = resource_size(io);
 	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
 		err = -EBUSY;
 		goto err_kfree;
@@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev)
 	atdma->regs = NULL;
 
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(io->start, io->end - io->start + 1);
+	release_mem_region(io->start, resource_size(io));
 
 	kfree(atdma);
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index af8c0b5ed70f..a7fca1653933 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -40,6 +40,8 @@ struct coh901318_desc {
 	struct coh901318_lli *lli;
 	enum dma_data_direction dir;
 	unsigned long flags;
+	u32 head_config;
+	u32 head_ctrl;
 };
 
 struct coh901318_base {
@@ -660,6 +662,9 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
 
 		coh901318_desc_submit(cohc, cohd);
 
+		/* Program the transaction head */
+		coh901318_set_conf(cohc, cohd->head_config);
+		coh901318_set_ctrl(cohc, cohd->head_ctrl);
 		coh901318_prep_linked_list(cohc, cohd->lli);
 
 		/* start dma job on this channel */
@@ -1090,8 +1095,6 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	} else
 		goto err_direction;
 
-	coh901318_set_conf(cohc, config);
-
 	/* The dma only supports transmitting packages up to
 	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
 	 * dma elemts required to send the entire sg list
@@ -1128,16 +1131,18 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (ret)
 		goto err_lli_fill;
 
-	/*
-	 * Set the default ctrl for the channel to the one from the lli,
-	 * things may have changed due to odd buffer alignment etc.
-	 */
-	coh901318_set_ctrl(cohc, lli->control);
 
 	COH_DBG(coh901318_list_print(cohc, lli));
 
 	/* Pick a descriptor to handle this transfer */
 	cohd = coh901318_desc_get(cohc);
+	cohd->head_config = config;
+	/*
+	 * Set the default head ctrl for the channel to the one from the
+	 * lli, things may have changed due to odd buffer alignment
+	 * etc.
+	 */
+	cohd->head_ctrl = lli->control;
 	cohd->dir = direction;
 	cohd->flags = flags;
 	cohd->desc.tx_submit = coh901318_tx_submit;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 8bcb15fb959d..f7f21a5de3e1 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -509,8 +509,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 					 dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 			} else if (err)
-				pr_err("dmaengine: failed to get %s: (%d)\n",
-				       dma_chan_name(chan), err);
+				pr_debug("dmaengine: failed to get %s: (%d)\n",
+					 dma_chan_name(chan), err);
 			else
 				break;
 			if (--device->privatecnt == 0)
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
new file mode 100644
index 000000000000..5d7a49bd7c26
--- /dev/null
+++ b/drivers/dma/ep93xx_dma.c
@@ -0,0 +1,1355 @@
+/*
+ * Driver for the Cirrus Logic EP93xx DMA Controller
+ *
+ * Copyright (C) 2011 Mika Westerberg
+ *
+ * DMA M2P implementation is based on the original
+ * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights:
+ *
+ *   Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ *   Copyright (C) 2006 Applied Data Systems
+ *   Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com>
+ *
+ * This driver is based on dw_dmac and amba-pl08x drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <mach/dma.h>
+
+/* M2P registers */
+#define M2P_CONTROL			0x0000
+#define M2P_CONTROL_STALLINT		BIT(0)
+#define M2P_CONTROL_NFBINT		BIT(1)
+#define M2P_CONTROL_CH_ERROR_INT	BIT(3)
+#define M2P_CONTROL_ENABLE		BIT(4)
+#define M2P_CONTROL_ICE			BIT(6)
+
+#define M2P_INTERRUPT			0x0004
+#define M2P_INTERRUPT_STALL		BIT(0)
+#define M2P_INTERRUPT_NFB		BIT(1)
+#define M2P_INTERRUPT_ERROR		BIT(3)
+
+#define M2P_PPALLOC			0x0008
+#define M2P_STATUS			0x000c
+
+#define M2P_MAXCNT0			0x0020
+#define M2P_BASE0			0x0024
+#define M2P_MAXCNT1			0x0030
+#define M2P_BASE1			0x0034
+
+#define M2P_STATE_IDLE			0
+#define M2P_STATE_STALL			1
+#define M2P_STATE_ON			2
+#define M2P_STATE_NEXT			3
+
+/* M2M registers */
+#define M2M_CONTROL			0x0000
+#define M2M_CONTROL_DONEINT		BIT(2)
+#define M2M_CONTROL_ENABLE		BIT(3)
+#define M2M_CONTROL_START		BIT(4)
+#define M2M_CONTROL_DAH			BIT(11)
+#define M2M_CONTROL_SAH			BIT(12)
+#define M2M_CONTROL_PW_SHIFT		9
+#define M2M_CONTROL_PW_8		(0 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_16		(1 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_32		(2 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_MASK		(3 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_TM_SHIFT		13
+#define M2M_CONTROL_TM_TX		(1 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_TM_RX		(2 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_RSS_SHIFT		22
+#define M2M_CONTROL_RSS_SSPRX		(1 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_SSPTX		(2 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_IDE		(3 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_NO_HDSK		BIT(24)
+#define M2M_CONTROL_PWSC_SHIFT		25
+
+#define M2M_INTERRUPT			0x0004
+#define M2M_INTERRUPT_DONEINT		BIT(1)
+
+#define M2M_BCR0			0x0010
+#define M2M_BCR1			0x0014
+#define M2M_SAR_BASE0			0x0018
+#define M2M_SAR_BASE1			0x001c
+#define M2M_DAR_BASE0			0x002c
+#define M2M_DAR_BASE1			0x0030
+
+#define DMA_MAX_CHAN_BYTES		0xffff
+#define DMA_MAX_CHAN_DESCRIPTORS	32
+
+struct ep93xx_dma_engine;
+
+/**
+ * struct ep93xx_dma_desc - EP93xx specific transaction descriptor
+ * @src_addr: source address of the transaction
+ * @dst_addr: destination address of the transaction
+ * @size: size of the transaction (in bytes)
+ * @complete: this descriptor is completed
+ * @txd: dmaengine API descriptor
+ * @tx_list: list of linked descriptors
+ * @node: link used for putting this into a channel queue
+ */
+struct ep93xx_dma_desc {
+	u32				src_addr;
+	u32				dst_addr;
+	size_t				size;
+	bool				complete;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		tx_list;
+	struct list_head		node;
+};
+
+/**
+ * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel
+ * @chan: dmaengine API channel
+ * @edma: pointer to to the engine device
+ * @regs: memory mapped registers
+ * @irq: interrupt number of the channel
+ * @clk: clock used by this channel
+ * @tasklet: channel specific tasklet used for callbacks
+ * @lock: lock protecting the fields following
+ * @flags: flags for the channel
+ * @buffer: which buffer to use next (0/1)
+ * @last_completed: last completed cookie value
+ * @active: flattened chain of descriptors currently being processed
+ * @queue: pending descriptors which are handled next
+ * @free_list: list of free descriptors which can be used
+ * @runtime_addr: physical address currently used as dest/src (M2M only). This
+ *                is set via %DMA_SLAVE_CONFIG before slave operation is
+ *                prepared
+ * @runtime_ctrl: M2M runtime values for the control register.
+ *
+ * As EP93xx DMA controller doesn't support real chained DMA descriptors we
+ * will have slightly different scheme here: @active points to a head of
+ * flattened DMA descriptor chain.
+ *
+ * @queue holds pending transactions. These are linked through the first
+ * descriptor in the chain. When a descriptor is moved to the @active queue,
+ * the first and chained descriptors are flattened into a single list.
+ *
+ * @chan.private holds pointer to &struct ep93xx_dma_data which contains
+ * necessary channel configuration information. For memcpy channels this must
+ * be %NULL.
+ */
+struct ep93xx_dma_chan {
+	struct dma_chan			chan;
+	const struct ep93xx_dma_engine	*edma;
+	void __iomem			*regs;
+	int				irq;
+	struct clk			*clk;
+	struct tasklet_struct		tasklet;
+	/* protects the fields following */
+	spinlock_t			lock;
+	unsigned long			flags;
+/* Channel is configured for cyclic transfers */
+#define EP93XX_DMA_IS_CYCLIC		0
+
+	int				buffer;
+	dma_cookie_t			last_completed;
+	struct list_head		active;
+	struct list_head		queue;
+	struct list_head		free_list;
+	u32				runtime_addr;
+	u32				runtime_ctrl;
+};
+
+/**
+ * struct ep93xx_dma_engine - the EP93xx DMA engine instance
+ * @dma_dev: holds the dmaengine device
+ * @m2m: is this an M2M or M2P device
+ * @hw_setup: method which sets the channel up for operation
+ * @hw_shutdown: shuts the channel down and flushes whatever is left
+ * @hw_submit: pushes active descriptor(s) to the hardware
+ * @hw_interrupt: handle the interrupt
+ * @num_channels: number of channels for this instance
+ * @channels: array of channels
+ *
+ * There is one instance of this struct for the M2P channels and one for the
+ * M2M channels. hw_xxx() methods are used to perform operations which are
+ * different on M2M and M2P channels. These methods are called with channel
+ * lock held and interrupts disabled so they cannot sleep.
+ */
+struct ep93xx_dma_engine {
+	struct dma_device	dma_dev;
+	bool			m2m;
+	int			(*hw_setup)(struct ep93xx_dma_chan *);
+	void			(*hw_shutdown)(struct ep93xx_dma_chan *);
+	void			(*hw_submit)(struct ep93xx_dma_chan *);
+	int			(*hw_interrupt)(struct ep93xx_dma_chan *);
+#define INTERRUPT_UNKNOWN	0
+#define INTERRUPT_DONE		1
+#define INTERRUPT_NEXT_BUFFER	2
+
+	size_t			num_channels;
+	struct ep93xx_dma_chan	channels[];
+};
+
+static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac)
+{
+	return &edmac->chan.dev->device;
+}
+
+static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct ep93xx_dma_chan, chan);
+}
+
+/**
+ * ep93xx_dma_set_active - set new active descriptor chain
+ * @edmac: channel
+ * @desc: head of the new active descriptor chain
+ *
+ * Sets @desc to be the head of the new active descriptor chain. This is the
+ * chain which is processed next. The active list must be empty before calling
+ * this function.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac,
+				  struct ep93xx_dma_desc *desc)
+{
+	BUG_ON(!list_empty(&edmac->active));
+
+	list_add_tail(&desc->node, &edmac->active);
+
+	/* Flatten the @desc->tx_list chain into @edmac->active list */
+	while (!list_empty(&desc->tx_list)) {
+		struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list,
+			struct ep93xx_dma_desc, node);
+
+		/*
+		 * We copy the callback parameters from the first descriptor
+		 * to all the chained descriptors. This way we can call the
+		 * callback without having to find out the first descriptor in
+		 * the chain. Useful for cyclic transfers.
+		 */
+		d->txd.callback = desc->txd.callback;
+		d->txd.callback_param = desc->txd.callback_param;
+
+		list_move_tail(&d->node, &edmac->active);
+	}
+}
+
+/* Called with @edmac->lock held and interrupts disabled */
+static struct ep93xx_dma_desc *
+ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
+{
+	return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node);
+}
+
+/**
+ * ep93xx_dma_advance_active - advances to the next active descriptor
+ * @edmac: channel
+ *
+ * Function advances active descriptor to the next in the @edmac->active and
+ * returns %true if we still have descriptors in the chain to process.
+ * Otherwise returns %false.
+ *
+ * When the channel is in cyclic mode always returns %true.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac)
+{
+	list_rotate_left(&edmac->active);
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+		return true;
+
+	/*
+	 * If txd.cookie is set it means that we are back in the first
+	 * descriptor in the chain and hence done with it.
+	 */
+	return !ep93xx_dma_get_active(edmac)->txd.cookie;
+}
+
+/*
+ * M2P DMA implementation
+ */
+
+static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control)
+{
+	writel(control, edmac->regs + M2P_CONTROL);
+	/*
+	 * EP93xx User's Guide states that we must perform a dummy read after
+	 * write to the control register.
+	 */
+	readl(edmac->regs + M2P_CONTROL);
+}
+
+static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control;
+
+	writel(data->port & 0xf, edmac->regs + M2P_PPALLOC);
+
+	control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE
+		| M2P_CONTROL_ENABLE;
+	m2p_set_control(edmac, control);
+
+	return 0;
+}
+
+static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
+{
+	return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
+}
+
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	u32 control;
+
+	control = readl(edmac->regs + M2P_CONTROL);
+	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+	m2p_set_control(edmac, control);
+
+	while (m2p_channel_state(edmac) >= M2P_STATE_ON)
+		cpu_relax();
+
+	m2p_set_control(edmac, 0);
+
+	while (m2p_channel_state(edmac) == M2P_STATE_STALL)
+		cpu_relax();
+}
+
+static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+	u32 bus_addr;
+
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE)
+		bus_addr = desc->src_addr;
+	else
+		bus_addr = desc->dst_addr;
+
+	if (edmac->buffer == 0) {
+		writel(desc->size, edmac->regs + M2P_MAXCNT0);
+		writel(bus_addr, edmac->regs + M2P_BASE0);
+	} else {
+		writel(desc->size, edmac->regs + M2P_MAXCNT1);
+		writel(bus_addr, edmac->regs + M2P_BASE1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2p_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	u32 control = readl(edmac->regs + M2P_CONTROL);
+
+	m2p_fill_desc(edmac);
+	control |= M2P_CONTROL_STALLINT;
+
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2p_fill_desc(edmac);
+		control |= M2P_CONTROL_NFBINT;
+	}
+
+	m2p_set_control(edmac, control);
+}
+
+static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 irq_status = readl(edmac->regs + M2P_INTERRUPT);
+	u32 control;
+
+	if (irq_status & M2P_INTERRUPT_ERROR) {
+		struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+
+		/* Clear the error interrupt */
+		writel(1, edmac->regs + M2P_INTERRUPT);
+
+		/*
+		 * It seems that there is no easy way of reporting errors back
+		 * to client so we just report the error here and continue as
+		 * usual.
+		 *
+		 * Revisit this when there is a mechanism to report back the
+		 * errors.
+		 */
+		dev_err(chan2dev(edmac),
+			"DMA transfer failed! Details:\n"
+			"\tcookie	: %d\n"
+			"\tsrc_addr	: 0x%08x\n"
+			"\tdst_addr	: 0x%08x\n"
+			"\tsize		: %zu\n",
+			desc->txd.cookie, desc->src_addr, desc->dst_addr,
+			desc->size);
+	}
+
+	switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) {
+	case M2P_INTERRUPT_STALL:
+		/* Disable interrupts */
+		control = readl(edmac->regs + M2P_CONTROL);
+		control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+		m2p_set_control(edmac, control);
+
+		return INTERRUPT_DONE;
+
+	case M2P_INTERRUPT_NFB:
+		if (ep93xx_dma_advance_active(edmac))
+			m2p_fill_desc(edmac);
+
+		return INTERRUPT_NEXT_BUFFER;
+	}
+
+	return INTERRUPT_UNKNOWN;
+}
+
+/*
+ * M2M DMA implementation
+ *
+ * For the M2M transfers we don't use NFB at all. This is because it simply
+ * doesn't work well with memcpy transfers. When you submit both buffers it is
+ * extremely unlikely that you get an NFB interrupt, but it instead reports
+ * DONE interrupt and both buffers are already transferred which means that we
+ * weren't able to update the next buffer.
+ *
+ * So for now we "simulate" NFB by just submitting buffer after buffer
+ * without double buffering.
+ */
+
+static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	const struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = 0;
+
+	if (!data) {
+		/* This is memcpy channel, nothing to configure */
+		writel(control, edmac->regs + M2M_CONTROL);
+		return 0;
+	}
+
+	switch (data->port) {
+	case EP93XX_DMA_SSP:
+		/*
+		 * This was found via experimenting - anything less than 5
+		 * causes the channel to perform only a partial transfer which
+		 * leads to problems since we don't get DONE interrupt then.
+		 */
+		control = (5 << M2M_CONTROL_PWSC_SHIFT);
+		control |= M2M_CONTROL_NO_HDSK;
+
+		if (data->direction == DMA_TO_DEVICE) {
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+			control |= M2M_CONTROL_RSS_SSPTX;
+		} else {
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+			control |= M2M_CONTROL_RSS_SSPRX;
+		}
+		break;
+
+	case EP93XX_DMA_IDE:
+		/*
+		 * This IDE part is totally untested. Values below are taken
+		 * from the EP93xx Users's Guide and might not be correct.
+		 */
+		control |= M2M_CONTROL_NO_HDSK;
+		control |= M2M_CONTROL_RSS_IDE;
+		control |= M2M_CONTROL_PW_16;
+
+		if (data->direction == DMA_TO_DEVICE) {
+			/* Worst case from the UG */
+			control = (3 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+		} else {
+			control = (2 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	writel(control, edmac->regs + M2M_CONTROL);
+	return 0;
+}
+
+static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	/* Just disable the channel */
+	writel(0, edmac->regs + M2M_CONTROL);
+}
+
+static void m2m_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+
+	if (edmac->buffer == 0) {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0);
+		writel(desc->size, edmac->regs + M2M_BCR0);
+	} else {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1);
+		writel(desc->size, edmac->regs + M2M_BCR1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2m_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = readl(edmac->regs + M2M_CONTROL);
+
+	/*
+	 * Since we allow clients to configure PW (peripheral width) we always
+	 * clear PW bits here and then set them according what is given in
+	 * the runtime configuration.
+	 */
+	control &= ~M2M_CONTROL_PW_MASK;
+	control |= edmac->runtime_ctrl;
+
+	m2m_fill_desc(edmac);
+	control |= M2M_CONTROL_DONEINT;
+
+	/*
+	 * Now we can finally enable the channel. For M2M channel this must be
+	 * done _after_ the BCRx registers are programmed.
+	 */
+	control |= M2M_CONTROL_ENABLE;
+	writel(control, edmac->regs + M2M_CONTROL);
+
+	if (!data) {
+		/*
+		 * For memcpy channels the software trigger must be asserted
+		 * in order to start the memcpy operation.
+		 */
+		control |= M2M_CONTROL_START;
+		writel(control, edmac->regs + M2M_CONTROL);
+	}
+}
+
+static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 control;
+
+	if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT))
+		return INTERRUPT_UNKNOWN;
+
+	/* Clear the DONE bit */
+	writel(0, edmac->regs + M2M_INTERRUPT);
+
+	/* Disable interrupts and the channel */
+	control = readl(edmac->regs + M2M_CONTROL);
+	control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE);
+	writel(control, edmac->regs + M2M_CONTROL);
+
+	/*
+	 * Since we only get DONE interrupt we have to find out ourselves
+	 * whether there still is something to process. So we try to advance
+	 * the chain an see whether it succeeds.
+	 */
+	if (ep93xx_dma_advance_active(edmac)) {
+		edmac->edma->hw_submit(edmac);
+		return INTERRUPT_NEXT_BUFFER;
+	}
+
+	return INTERRUPT_DONE;
+}
+
+/*
+ * DMA engine API implementation
+ */
+
+static struct ep93xx_dma_desc *
+ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc, *_desc;
+	struct ep93xx_dma_desc *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del_init(&desc->node);
+
+			/* Re-initialize the descriptor */
+			desc->src_addr = 0;
+			desc->dst_addr = 0;
+			desc->size = 0;
+			desc->complete = false;
+			desc->txd.cookie = 0;
+			desc->txd.callback = NULL;
+			desc->txd.callback_param = NULL;
+
+			ret = desc;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return ret;
+}
+
+static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac,
+				struct ep93xx_dma_desc *desc)
+{
+	if (desc) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&edmac->lock, flags);
+		list_splice_init(&desc->tx_list, &edmac->free_list);
+		list_add(&desc->node, &edmac->free_list);
+		spin_unlock_irqrestore(&edmac->lock, flags);
+	}
+}
+
+/**
+ * ep93xx_dma_advance_work - start processing the next pending transaction
+ * @edmac: channel
+ *
+ * If we have pending transactions queued and we are currently idling, this
+ * function takes the next queued transaction from the @edmac->queue and
+ * pushes it to the hardware for execution.
+ */
+static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *new;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) {
+		spin_unlock_irqrestore(&edmac->lock, flags);
+		return;
+	}
+
+	/* Take the next descriptor from the pending queue */
+	new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node);
+	list_del_init(&new->node);
+
+	ep93xx_dma_set_active(edmac, new);
+
+	/* Push it to the hardware */
+	edmac->edma->hw_submit(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+}
+
+static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
+{
+	struct device *dev = desc->txd.chan->device->dev;
+
+	if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, desc->src_addr, desc->size,
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, desc->src_addr, desc->size,
+				       DMA_TO_DEVICE);
+	}
+	if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, desc->dst_addr, desc->size,
+					 DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, desc->dst_addr, desc->size,
+				       DMA_FROM_DEVICE);
+	}
+}
+
+static void ep93xx_dma_tasklet(unsigned long data)
+{
+	struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
+	struct ep93xx_dma_desc *desc, *d;
+	dma_async_tx_callback callback;
+	void *callback_param;
+	LIST_HEAD(list);
+
+	spin_lock_irq(&edmac->lock);
+	desc = ep93xx_dma_get_active(edmac);
+	if (desc->complete) {
+		edmac->last_completed = desc->txd.cookie;
+		list_splice_init(&edmac->active, &list);
+	}
+	spin_unlock_irq(&edmac->lock);
+
+	/* Pick up the next descriptor from the queue */
+	ep93xx_dma_advance_work(edmac);
+
+	callback = desc->txd.callback;
+	callback_param = desc->txd.callback_param;
+
+	/* Now we can release all the chained descriptors */
+	list_for_each_entry_safe(desc, d, &list, node) {
+		/*
+		 * For the memcpy channels the API requires us to unmap the
+		 * buffers unless requested otherwise.
+		 */
+		if (!edmac->chan.private)
+			ep93xx_dma_unmap_buffers(desc);
+
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	if (callback)
+		callback(callback_param);
+}
+
+static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id)
+{
+	struct ep93xx_dma_chan *edmac = dev_id;
+	irqreturn_t ret = IRQ_HANDLED;
+
+	spin_lock(&edmac->lock);
+
+	switch (edmac->edma->hw_interrupt(edmac)) {
+	case INTERRUPT_DONE:
+		ep93xx_dma_get_active(edmac)->complete = true;
+		tasklet_schedule(&edmac->tasklet);
+		break;
+
+	case INTERRUPT_NEXT_BUFFER:
+		if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+			tasklet_schedule(&edmac->tasklet);
+		break;
+
+	default:
+		dev_warn(chan2dev(edmac), "unknown interrupt!\n");
+		ret = IRQ_NONE;
+		break;
+	}
+
+	spin_unlock(&edmac->lock);
+	return ret;
+}
+
+/**
+ * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed
+ * @tx: descriptor to be executed
+ *
+ * Function will execute given descriptor on the hardware or if the hardware
+ * is busy, queue the descriptor to be executed later on. Returns cookie which
+ * can be used to poll the status of the descriptor.
+ */
+static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan);
+	struct ep93xx_dma_desc *desc;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+
+	cookie = edmac->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	desc = container_of(tx, struct ep93xx_dma_desc, txd);
+
+	edmac->chan.cookie = cookie;
+	desc->txd.cookie = cookie;
+
+	/*
+	 * If nothing is currently prosessed, we push this descriptor
+	 * directly to the hardware. Otherwise we put the descriptor
+	 * to the pending queue.
+	 */
+	if (list_empty(&edmac->active)) {
+		ep93xx_dma_set_active(edmac, desc);
+		edmac->edma->hw_submit(edmac);
+	} else {
+		list_add_tail(&desc->node, &edmac->queue);
+	}
+
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return cookie;
+}
+
+/**
+ * ep93xx_dma_alloc_chan_resources - allocate resources for the channel
+ * @chan: channel to allocate resources
+ *
+ * Function allocates necessary resources for the given DMA channel and
+ * returns number of allocated descriptors for the channel. Negative errno
+ * is returned in case of failure.
+ */
+static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_data *data = chan->private;
+	const char *name = dma_chan_name(chan);
+	int ret, i;
+
+	/* Sanity check the channel parameters */
+	if (!edmac->edma->m2m) {
+		if (!data)
+			return -EINVAL;
+		if (data->port < EP93XX_DMA_I2S1 ||
+		    data->port > EP93XX_DMA_IRDA)
+			return -EINVAL;
+		if (data->direction != ep93xx_dma_chan_direction(chan))
+			return -EINVAL;
+	} else {
+		if (data) {
+			switch (data->port) {
+			case EP93XX_DMA_SSP:
+			case EP93XX_DMA_IDE:
+				if (data->direction != DMA_TO_DEVICE &&
+				    data->direction != DMA_FROM_DEVICE)
+					return -EINVAL;
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (data && data->name)
+		name = data->name;
+
+	ret = clk_enable(edmac->clk);
+	if (ret)
+		return ret;
+
+	ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac);
+	if (ret)
+		goto fail_clk_disable;
+
+	spin_lock_irq(&edmac->lock);
+	edmac->last_completed = 1;
+	edmac->chan.cookie = 1;
+	ret = edmac->edma->hw_setup(edmac);
+	spin_unlock_irq(&edmac->lock);
+
+	if (ret)
+		goto fail_free_irq;
+
+	for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) {
+		struct ep93xx_dma_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "not enough descriptors\n");
+			break;
+		}
+
+		INIT_LIST_HEAD(&desc->tx_list);
+
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.tx_submit = ep93xx_dma_tx_submit;
+
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	return i;
+
+fail_free_irq:
+	free_irq(edmac->irq, edmac);
+fail_clk_disable:
+	clk_disable(edmac->clk);
+
+	return ret;
+}
+
+/**
+ * ep93xx_dma_free_chan_resources - release resources for the channel
+ * @chan: channel
+ *
+ * Function releases all the resources allocated for the given channel.
+ * The channel must be idle when this is called.
+ */
+static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	BUG_ON(!list_empty(&edmac->active));
+	BUG_ON(!list_empty(&edmac->queue));
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->edma->hw_shutdown(edmac);
+	edmac->runtime_addr = 0;
+	edmac->runtime_ctrl = 0;
+	edmac->buffer = 0;
+	list_splice_init(&edmac->free_list, &list);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, d, &list, node)
+		kfree(desc);
+
+	clk_disable(edmac->clk);
+	free_irq(edmac->irq, edmac);
+}
+
+/**
+ * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation
+ * @chan: channel
+ * @dest: destination bus address
+ * @src: source bus address
+ * @len: size of the transaction
+ * @flags: flags for the descriptor
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			   dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t bytes, offset;
+
+	first = NULL;
+	for (offset = 0; offset < len; offset += bytes) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES);
+
+		desc->src_addr = src + offset;
+		desc->dst_addr = dest + offset;
+		desc->size = bytes;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation
+ * @chan: channel
+ * @sgl: list of buffers to transfer
+ * @sg_len: number of entries in @sgl
+ * @dir: direction of tha DMA transfer
+ * @flags: flags for the descriptor
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_data_direction dir,
+			 unsigned long flags)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	struct scatterlist *sg;
+	int i;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	first = NULL;
+	for_each_sg(sgl, sg, sg_len, i) {
+		size_t sg_len = sg_dma_len(sg);
+
+		if (sg_len > DMA_MAX_CHAN_BYTES) {
+			dev_warn(chan2dev(edmac), "too big transfer size %d\n",
+				 sg_len);
+			goto fail;
+		}
+
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_TO_DEVICE) {
+			desc->src_addr = sg_dma_address(sg);
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = sg_dma_address(sg);
+		}
+		desc->size = sg_len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation
+ * @chan: channel
+ * @dma_addr: DMA mapped address of the buffer
+ * @buf_len: length of the buffer (in bytes)
+ * @period_len: lenght of a single period
+ * @dir: direction of the operation
+ *
+ * Prepares a descriptor for cyclic DMA operation. This means that once the
+ * descriptor is submitted, we will be submitting in a @period_len sized
+ * buffers and calling callback once the period has been elapsed. Transfer
+ * terminates only when client calls dmaengine_terminate_all() for this
+ * channel.
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+			   size_t buf_len, size_t period_len,
+			   enum dma_data_direction dir)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t offset = 0;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	if (period_len > DMA_MAX_CHAN_BYTES) {
+		dev_warn(chan2dev(edmac), "too big period length %d\n",
+			 period_len);
+		return NULL;
+	}
+
+	/* Split the buffer into period size chunks */
+	first = NULL;
+	for (offset = 0; offset < buf_len; offset += period_len) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_TO_DEVICE) {
+			desc->src_addr = dma_addr + offset;
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = dma_addr + offset;
+		}
+
+		desc->size = period_len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_terminate_all - terminate all transactions
+ * @edmac: channel
+ *
+ * Stops all DMA transactions. All descriptors are put back to the
+ * @edmac->free_list and callbacks are _not_ called.
+ */
+static int ep93xx_dma_terminate_all(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc, *_d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	/* First we disable and flush the DMA channel */
+	edmac->edma->hw_shutdown(edmac);
+	clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags);
+	list_splice_init(&edmac->active, &list);
+	list_splice_init(&edmac->queue, &list);
+	/*
+	 * We then re-enable the channel. This way we can continue submitting
+	 * the descriptors by just calling ->hw_submit() again.
+	 */
+	edmac->edma->hw_setup(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, _d, &list, node)
+		ep93xx_dma_desc_put(edmac, desc);
+
+	return 0;
+}
+
+static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
+				   struct dma_slave_config *config)
+{
+	enum dma_slave_buswidth width;
+	unsigned long flags;
+	u32 addr, ctrl;
+
+	if (!edmac->edma->m2m)
+		return -EINVAL;
+
+	switch (config->direction) {
+	case DMA_FROM_DEVICE:
+		width = config->src_addr_width;
+		addr = config->src_addr;
+		break;
+
+	case DMA_TO_DEVICE:
+		width = config->dst_addr_width;
+		addr = config->dst_addr;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		ctrl = 0;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		ctrl = M2M_CONTROL_PW_16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		ctrl = M2M_CONTROL_PW_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->runtime_addr = addr;
+	edmac->runtime_ctrl = ctrl;
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ep93xx_dma_control - manipulate all pending operations on a channel
+ * @chan: channel
+ * @cmd: control command to perform
+ * @arg: optional argument
+ *
+ * Controls the channel. Function returns %0 in case of success or negative
+ * error in case of failure.
+ */
+static int ep93xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			      unsigned long arg)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct dma_slave_config *config;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return ep93xx_dma_terminate_all(edmac);
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return ep93xx_dma_slave_config(edmac, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/**
+ * ep93xx_dma_tx_status - check if a transaction is completed
+ * @chan: channel
+ * @cookie: transaction specific cookie
+ * @state: state of the transaction is stored here if given
+ *
+ * This function can be used to query state of a given transaction.
+ */
+static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *state)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	dma_cookie_t last_used, last_completed;
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	last_used = chan->cookie;
+	last_completed = edmac->last_completed;
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	ret = dma_async_is_complete(cookie, last_completed, last_used);
+	dma_set_tx_state(state, last_completed, last_used, 0);
+
+	return ret;
+}
+
+/**
+ * ep93xx_dma_issue_pending - push pending transactions to the hardware
+ * @chan: channel
+ *
+ * When this function is called, all pending transactions are pushed to the
+ * hardware and executed.
+ */
+static void ep93xx_dma_issue_pending(struct dma_chan *chan)
+{
+	ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan));
+}
+
+static int __init ep93xx_dma_probe(struct platform_device *pdev)
+{
+	struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct ep93xx_dma_engine *edma;
+	struct dma_device *dma_dev;
+	size_t edma_size;
+	int ret, i;
+
+	edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan);
+	edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL);
+	if (!edma)
+		return -ENOMEM;
+
+	dma_dev = &edma->dma_dev;
+	edma->m2m = platform_get_device_id(pdev)->driver_data;
+	edma->num_channels = pdata->num_channels;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+	for (i = 0; i < pdata->num_channels; i++) {
+		const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i];
+		struct ep93xx_dma_chan *edmac = &edma->channels[i];
+
+		edmac->chan.device = dma_dev;
+		edmac->regs = cdata->base;
+		edmac->irq = cdata->irq;
+		edmac->edma = edma;
+
+		edmac->clk = clk_get(NULL, cdata->name);
+		if (IS_ERR(edmac->clk)) {
+			dev_warn(&pdev->dev, "failed to get clock for %s\n",
+				 cdata->name);
+			continue;
+		}
+
+		spin_lock_init(&edmac->lock);
+		INIT_LIST_HEAD(&edmac->active);
+		INIT_LIST_HEAD(&edmac->queue);
+		INIT_LIST_HEAD(&edmac->free_list);
+		tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet,
+			     (unsigned long)edmac);
+
+		list_add_tail(&edmac->chan.device_node,
+			      &dma_dev->channels);
+	}
+
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+
+	dma_dev->dev = &pdev->dev;
+	dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources;
+	dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
+	dma_dev->device_control = ep93xx_dma_control;
+	dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
+	dma_dev->device_tx_status = ep93xx_dma_tx_status;
+
+	dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES);
+
+	if (edma->m2m) {
+		dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+		dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy;
+
+		edma->hw_setup = m2m_hw_setup;
+		edma->hw_shutdown = m2m_hw_shutdown;
+		edma->hw_submit = m2m_hw_submit;
+		edma->hw_interrupt = m2m_hw_interrupt;
+	} else {
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+		edma->hw_setup = m2p_hw_setup;
+		edma->hw_shutdown = m2p_hw_shutdown;
+		edma->hw_submit = m2p_hw_submit;
+		edma->hw_interrupt = m2p_hw_interrupt;
+	}
+
+	ret = dma_async_device_register(dma_dev);
+	if (unlikely(ret)) {
+		for (i = 0; i < edma->num_channels; i++) {
+			struct ep93xx_dma_chan *edmac = &edma->channels[i];
+			if (!IS_ERR_OR_NULL(edmac->clk))
+				clk_put(edmac->clk);
+		}
+		kfree(edma);
+	} else {
+		dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n",
+			 edma->m2m ? "M" : "P");
+	}
+
+	return ret;
+}
+
+static struct platform_device_id ep93xx_dma_driver_ids[] = {
+	{ "ep93xx-dma-m2p", 0 },
+	{ "ep93xx-dma-m2m", 1 },
+	{ },
+};
+
+static struct platform_driver ep93xx_dma_driver = {
+	.driver		= {
+		.name	= "ep93xx-dma",
+	},
+	.id_table	= ep93xx_dma_driver_ids,
+};
+
+static int __init ep93xx_dma_module_init(void)
+{
+	return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe);
+}
+subsys_initcall(ep93xx_dma_module_init);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>");
+MODULE_DESCRIPTION("EP93xx DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index b6d1455fa936..ec53980f8fcf 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1281,8 +1281,10 @@ static int __init sdma_probe(struct platform_device *pdev)
 		goto err_request_irq;
 
 	sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
-	if (!sdma->script_addrs)
+	if (!sdma->script_addrs) {
+		ret = -ENOMEM;
 		goto err_alloc;
+	}
 
 	sdma->version = pdata->sdma_version;
 
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index f653517ef744..8a3fdd87db97 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -1351,7 +1351,6 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state)
 			return -EAGAIN;
 	}
 	device->state = SUSPENDED;
-	pci_set_drvdata(pci, device);
 	pci_save_state(pci);
 	pci_disable_device(pci);
 	pci_set_power_state(pci, PCI_D3hot);
@@ -1380,7 +1379,6 @@ int dma_resume(struct pci_dev *pci)
 	}
 	device->state = RUNNING;
 	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	pci_set_drvdata(pci, device);
 	return 0;
 }
 
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index c1a125e7d1df..25447a8ca282 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1705,16 +1705,14 @@ static int __init ipu_probe(struct platform_device *pdev)
 		ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
 
 	/* Remap IPU common registers */
-	ipu_data.reg_ipu = ioremap(mem_ipu->start,
-				   mem_ipu->end - mem_ipu->start + 1);
+	ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
 	if (!ipu_data.reg_ipu) {
 		ret = -ENOMEM;
 		goto err_ioremap_ipu;
 	}
 
 	/* Remap Image Converter and Image DMA Controller registers */
-	ipu_data.reg_ic = ioremap(mem_ic->start,
-				  mem_ic->end - mem_ic->start + 1);
+	ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
 	if (!ipu_data.reg_ic) {
 		ret = -ENOMEM;
 		goto err_ioremap_ic;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 954e334e01bb..9a353c2216d0 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1305,7 +1305,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	msp->xor_base = devm_ioremap(&pdev->dev, res->start,
-				     res->end - res->start + 1);
+				     resource_size(res));
 	if (!msp->xor_base)
 		return -EBUSY;
 
@@ -1314,7 +1314,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
-					  res->end - res->start + 1);
+					  resource_size(res));
 	if (!msp->xor_high_base)
 		return -EBUSY;
 
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 88aad4f54002..be641cbd36fc 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -327,10 +327,12 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 
 	memset(mxs_chan->ccw, 0, PAGE_SIZE);
 
-	ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
-				0, "mxs-dma", mxs_dma);
-	if (ret)
-		goto err_irq;
+	if (mxs_chan->chan_irq != NO_IRQ) {
+		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+					0, "mxs-dma", mxs_dma);
+		if (ret)
+			goto err_irq;
+	}
 
 	ret = clk_enable(mxs_dma->clk);
 	if (ret)
@@ -535,6 +537,7 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
 		mxs_dma_disable_chan(mxs_chan);
+		mxs_dma_reset_chan(mxs_chan);
 		break;
 	case DMA_PAUSE:
 		mxs_dma_pause_chan(mxs_chan);
@@ -707,6 +710,8 @@ static struct platform_device_id mxs_dma_type[] = {
 	}, {
 		.name = "mxs-dma-apbx",
 		.driver_data = MXS_DMA_APBX,
+	}, {
+		/* end of list */
 	}
 };
 
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index ff5b38f9d45b..1ac8d4b580b7 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -45,7 +45,8 @@
 #define DMA_STATUS_MASK_BITS		0x3
 #define DMA_STATUS_SHIFT_BITS		16
 #define DMA_STATUS_IRQ(x)		(0x1 << (x))
-#define DMA_STATUS_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS0_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS2_ERR(x)		(0x1 << (x))
 
 #define DMA_DESC_WIDTH_SHIFT_BITS	12
 #define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
@@ -61,6 +62,9 @@
 
 #define MAX_CHAN_NR			8
 
+#define DMA_MASK_CTL0_MODE	0x33333333
+#define DMA_MASK_CTL2_MODE	0x00003333
+
 static unsigned int init_nr_desc_per_channel = 64;
 module_param(init_nr_desc_per_channel, uint, 0644);
 MODULE_PARM_DESC(init_nr_desc_per_channel,
@@ -133,6 +137,7 @@ struct pch_dma {
 #define PCH_DMA_CTL3	0x0C
 #define PCH_DMA_STS0	0x10
 #define PCH_DMA_STS1	0x14
+#define PCH_DMA_STS2	0x18
 
 #define dma_readl(pd, name) \
 	readl((pd)->membase + PCH_DMA_##name)
@@ -183,13 +188,19 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable)
 {
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
+	int pos;
+
+	if (chan->chan_id < 8)
+		pos = chan->chan_id;
+	else
+		pos = chan->chan_id + 8;
 
 	val = dma_readl(pd, CTL2);
 
 	if (enable)
-		val |= 0x1 << chan->chan_id;
+		val |= 0x1 << pos;
 	else
-		val &= ~(0x1 << chan->chan_id);
+		val &= ~(0x1 << pos);
 
 	dma_writel(pd, CTL2, val);
 
@@ -202,10 +213,17 @@ static void pdc_set_dir(struct dma_chan *chan)
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
+	u32 mask_mode;
+	u32 mask_ctl;
 
 	if (chan->chan_id < 8) {
 		val = dma_readl(pd, CTL0);
 
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+					(DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_mode;
 		if (pd_chan->dir == DMA_TO_DEVICE)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 				       DMA_CTL0_DIR_SHIFT_BITS);
@@ -213,18 +231,24 @@ static void pdc_set_dir(struct dma_chan *chan)
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 					 DMA_CTL0_DIR_SHIFT_BITS));
 
+		val |= mask_ctl;
 		dma_writel(pd, CTL0, val);
 	} else {
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
 		val = dma_readl(pd, CTL3);
 
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+						(DMA_CTL0_BITS_PER_CH * ch);
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_mode;
 		if (pd_chan->dir == DMA_TO_DEVICE)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 					 DMA_CTL0_DIR_SHIFT_BITS));
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL3, val);
 	}
 
@@ -236,33 +260,37 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode)
 {
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
+	u32 mask_ctl;
+	u32 mask_dir;
 
 	if (chan->chan_id < 8) {
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+			   (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
 		val = dma_readl(pd, CTL0);
-
-		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
-			(DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_dir;
 		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL0, val);
 	} else {
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
-
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
 		val = dma_readl(pd, CTL3);
-
-		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
-			(DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_dir;
 		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL3, val);
-
 	}
 
 	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
 		chan->chan_id, val);
 }
 
-static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
+static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
 {
 	struct pch_dma *pd = to_pd(pd_chan->chan.device);
 	u32 val;
@@ -272,9 +300,27 @@ static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
 			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
 }
 
+static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS2);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
+}
+
 static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
 {
-	if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE)
+	u32 sts;
+
+	if (pd_chan->chan.chan_id < 8)
+		sts = pdc_get_status0(pd_chan);
+	else
+		sts = pdc_get_status2(pd_chan);
+
+
+	if (sts == DMA_STATUS_IDLE)
 		return true;
 	else
 		return false;
@@ -495,11 +541,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan)
 		list_add_tail(&desc->desc_node, &tmp_list);
 	}
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	list_splice(&tmp_list, &pd_chan->free_list);
 	pd_chan->descs_allocated = i;
 	pd_chan->completed_cookie = chan->cookie = 1;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 	pdc_enable_irq(chan, 1);
 
@@ -517,10 +563,10 @@ static void pd_free_chan_resources(struct dma_chan *chan)
 	BUG_ON(!list_empty(&pd_chan->active_list));
 	BUG_ON(!list_empty(&pd_chan->queue));
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	list_splice_init(&pd_chan->free_list, &tmp_list);
 	pd_chan->descs_allocated = 0;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
 		pci_pool_free(pd->pool, desc, desc->txd.phys);
@@ -536,10 +582,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	dma_cookie_t last_completed;
 	int ret;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	last_completed = pd_chan->completed_cookie;
 	last_used = chan->cookie;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 	ret = dma_async_is_complete(cookie, last_completed, last_used);
 
@@ -654,7 +700,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 
 	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
 
@@ -664,7 +710,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	list_for_each_entry_safe(desc, _d, &list, desc_node)
 		pdc_chain_complete(pd_chan, desc);
 
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 	return 0;
 }
@@ -693,30 +739,45 @@ static irqreturn_t pd_irq(int irq, void *devid)
 	struct pch_dma *pd = (struct pch_dma *)devid;
 	struct pch_dma_chan *pd_chan;
 	u32 sts0;
+	u32 sts2;
 	int i;
-	int ret = IRQ_NONE;
+	int ret0 = IRQ_NONE;
+	int ret2 = IRQ_NONE;
 
 	sts0 = dma_readl(pd, STS0);
+	sts2 = dma_readl(pd, STS2);
 
 	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
 
 	for (i = 0; i < pd->dma.chancnt; i++) {
 		pd_chan = &pd->channels[i];
 
-		if (sts0 & DMA_STATUS_IRQ(i)) {
-			if (sts0 & DMA_STATUS_ERR(i))
-				set_bit(0, &pd_chan->err_status);
+		if (i < 8) {
+			if (sts0 & DMA_STATUS_IRQ(i)) {
+				if (sts0 & DMA_STATUS0_ERR(i))
+					set_bit(0, &pd_chan->err_status);
 
-			tasklet_schedule(&pd_chan->tasklet);
-			ret = IRQ_HANDLED;
-		}
+				tasklet_schedule(&pd_chan->tasklet);
+				ret0 = IRQ_HANDLED;
+			}
+		} else {
+			if (sts2 & DMA_STATUS_IRQ(i - 8)) {
+				if (sts2 & DMA_STATUS2_ERR(i))
+					set_bit(0, &pd_chan->err_status);
 
+				tasklet_schedule(&pd_chan->tasklet);
+				ret2 = IRQ_HANDLED;
+			}
+		}
 	}
 
 	/* clear interrupt bits in status register */
-	dma_writel(pd, STS0, sts0);
+	if (ret0)
+		dma_writel(pd, STS0, sts0);
+	if (ret2)
+		dma_writel(pd, STS2, sts2);
 
-	return ret;
+	return ret0 | ret2;
 }
 
 #ifdef	CONFIG_PM
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 6abe1ec1f2ce..00eee59e8b33 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -82,7 +82,7 @@ struct dma_pl330_dmac {
 	spinlock_t pool_lock;
 
 	/* Peripheral channels connected to this DMAC */
-	struct dma_pl330_chan peripherals[0]; /* keep at end */
+	struct dma_pl330_chan *peripherals; /* keep at end */
 };
 
 struct dma_pl330_desc {
@@ -451,8 +451,13 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	desc->txd.cookie = 0;
 	async_tx_ack(&desc->txd);
 
-	desc->req.rqtype = peri->rqtype;
-	desc->req.peri = peri->peri_id;
+	if (peri) {
+		desc->req.rqtype = peri->rqtype;
+		desc->req.peri = peri->peri_id;
+	} else {
+		desc->req.rqtype = MEMTOMEM;
+		desc->req.peri = 0;
+	}
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
@@ -529,10 +534,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 	struct pl330_info *pi;
 	int burst;
 
-	if (unlikely(!pch || !len || !peri))
+	if (unlikely(!pch || !len))
 		return NULL;
 
-	if (peri->rqtype != MEMTOMEM)
+	if (peri && peri->rqtype != MEMTOMEM)
 		return NULL;
 
 	pi = &pch->dmac->pif;
@@ -577,7 +582,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	int i, burst_size;
 	dma_addr_t addr;
 
-	if (unlikely(!pch || !sgl || !sg_len))
+	if (unlikely(!pch || !sgl || !sg_len || !peri))
 		return NULL;
 
 	/* Make sure the direction is consistent */
@@ -666,17 +671,12 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	struct dma_device *pd;
 	struct resource *res;
 	int i, ret, irq;
+	int num_chan;
 
 	pdat = adev->dev.platform_data;
 
-	if (!pdat || !pdat->nr_valid_peri) {
-		dev_err(&adev->dev, "platform data missing\n");
-		return -ENODEV;
-	}
-
 	/* Allocate a new DMAC and its Channels */
-	pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch)
-				+ sizeof(*pdmac), GFP_KERNEL);
+	pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL);
 	if (!pdmac) {
 		dev_err(&adev->dev, "unable to allocate mem\n");
 		return -ENOMEM;
@@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pi = &pdmac->pif;
 	pi->dev = &adev->dev;
 	pi->pl330_data = NULL;
-	pi->mcbufsz = pdat->mcbuf_sz;
+	pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
 	request_mem_region(res->start, resource_size(res), "dma-pl330");
@@ -717,27 +717,35 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
-	for (i = 0; i < pdat->nr_valid_peri; i++) {
-		struct dma_pl330_peri *peri = &pdat->peri[i];
-		pch = &pdmac->peripherals[i];
+	num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan);
+	pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
 
-		switch (peri->rqtype) {
-		case MEMTOMEM:
+	for (i = 0; i < num_chan; i++) {
+		pch = &pdmac->peripherals[i];
+		if (pdat) {
+			struct dma_pl330_peri *peri = &pdat->peri[i];
+
+			switch (peri->rqtype) {
+			case MEMTOMEM:
+				dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+				break;
+			case MEMTODEV:
+			case DEVTOMEM:
+				dma_cap_set(DMA_SLAVE, pd->cap_mask);
+				break;
+			default:
+				dev_err(&adev->dev, "DEVTODEV Not Supported\n");
+				continue;
+			}
+			pch->chan.private = peri;
+		} else {
 			dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-			break;
-		case MEMTODEV:
-		case DEVTOMEM:
-			dma_cap_set(DMA_SLAVE, pd->cap_mask);
-			break;
-		default:
-			dev_err(&adev->dev, "DEVTODEV Not Supported\n");
-			continue;
+			pch->chan.private = NULL;
 		}
 
 		INIT_LIST_HEAD(&pch->work_list);
 		spin_lock_init(&pch->lock);
 		pch->pl330_chid = NULL;
-		pch->chan.private = peri;
 		pch->chan.device = pd;
 		pch->chan.chan_id = i;
 		pch->dmac = pdmac;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8f222d4db7de..75ba5865d7a4 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/amba/bus.h>
 
 #include <plat/ste_dma40.h>
 
@@ -44,9 +45,6 @@
 #define D40_ALLOC_PHY		(1 << 30)
 #define D40_ALLOC_LOG_FREE	0
 
-/* Hardware designer of the block */
-#define D40_HW_DESIGNER 0x8
-
 /**
  * enum 40_command - The different commands and/or statuses.
  *
@@ -185,6 +183,8 @@ struct d40_base;
  * @log_def: Default logical channel settings.
  * @lcla: Space for one dst src pair for logical channel transfers.
  * @lcpa: Pointer to dst and src lcpa settings.
+ * @runtime_addr: runtime configured address.
+ * @runtime_direction: runtime configured direction.
  *
  * This struct can either "be" a logical or a physical channel.
  */
@@ -199,6 +199,7 @@ struct d40_chan {
 	struct dma_chan			 chan;
 	struct tasklet_struct		 tasklet;
 	struct list_head		 client;
+	struct list_head		 pending_queue;
 	struct list_head		 active;
 	struct list_head		 queue;
 	struct stedma40_chan_cfg	 dma_cfg;
@@ -644,7 +645,20 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
 
 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
 {
-	list_add_tail(&desc->node, &d40c->queue);
+	list_add_tail(&desc->node, &d40c->pending_queue);
+}
+
+static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
+{
+	struct d40_desc *d;
+
+	if (list_empty(&d40c->pending_queue))
+		return NULL;
+
+	d = list_first_entry(&d40c->pending_queue,
+			     struct d40_desc,
+			     node);
+	return d;
 }
 
 static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
@@ -801,6 +815,11 @@ static void d40_term_all(struct d40_chan *d40c)
 		d40_desc_free(d40c, d40d);
 	}
 
+	/* Release pending descriptors */
+	while ((d40d = d40_first_pending(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
 
 	d40c->pending_tx = 0;
 	d40c->busy = false;
@@ -2091,7 +2110,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 	struct scatterlist *sg;
 	int i;
 
-	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL);
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
 	for (i = 0; i < periods; i++) {
 		sg_dma_address(&sg[i]) = dma_addr;
 		sg_dma_len(&sg[i]) = period_len;
@@ -2151,24 +2170,87 @@ static void d40_issue_pending(struct dma_chan *chan)
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	/* Busy means that pending jobs are already being processed */
+	list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
+
+	/* Busy means that queued jobs are already being processed */
 	if (!d40c->busy)
 		(void) d40_queue_start(d40c);
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 }
 
+static int
+dma40_config_to_halfchannel(struct d40_chan *d40c,
+			    struct stedma40_half_channel_info *info,
+			    enum dma_slave_buswidth width,
+			    u32 maxburst)
+{
+	enum stedma40_periph_data_width addr_width;
+	int psize;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		addr_width = STEDMA40_BYTE_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		addr_width = STEDMA40_HALFWORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		addr_width = STEDMA40_WORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
+		break;
+	default:
+		dev_err(d40c->base->dev,
+			"illegal peripheral address width "
+			"requested (%d)\n",
+			width);
+		return -EINVAL;
+	}
+
+	if (chan_is_logical(d40c)) {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_LOG_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_LOG_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_LOG_4;
+		else
+			psize = STEDMA40_PSIZE_LOG_1;
+	} else {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_PHY_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_PHY_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_PHY_4;
+		else
+			psize = STEDMA40_PSIZE_PHY_1;
+	}
+
+	info->data_width = addr_width;
+	info->psize = psize;
+	info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+	return 0;
+}
+
 /* Runtime reconfiguration extension */
-static void d40_set_runtime_config(struct dma_chan *chan,
-			       struct dma_slave_config *config)
+static int d40_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
 {
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
-	enum dma_slave_buswidth config_addr_width;
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
 	dma_addr_t config_addr;
-	u32 config_maxburst;
-	enum stedma40_periph_data_width addr_width;
-	int psize;
+	u32 src_maxburst, dst_maxburst;
+	int ret;
+
+	src_addr_width = config->src_addr_width;
+	src_maxburst = config->src_maxburst;
+	dst_addr_width = config->dst_addr_width;
+	dst_maxburst = config->dst_maxburst;
 
 	if (config->direction == DMA_FROM_DEVICE) {
 		dma_addr_t dev_addr_rx =
@@ -2187,8 +2269,11 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 				cfg->dir);
 		cfg->dir = STEDMA40_PERIPH_TO_MEM;
 
-		config_addr_width = config->src_addr_width;
-		config_maxburst = config->src_maxburst;
+		/* Configure the memory side */
+		if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			dst_addr_width = src_addr_width;
+		if (dst_maxburst == 0)
+			dst_maxburst = src_maxburst;
 
 	} else if (config->direction == DMA_TO_DEVICE) {
 		dma_addr_t dev_addr_tx =
@@ -2207,68 +2292,39 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 				cfg->dir);
 		cfg->dir = STEDMA40_MEM_TO_PERIPH;
 
-		config_addr_width = config->dst_addr_width;
-		config_maxburst = config->dst_maxburst;
-
+		/* Configure the memory side */
+		if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			src_addr_width = dst_addr_width;
+		if (src_maxburst == 0)
+			src_maxburst = dst_maxburst;
 	} else {
 		dev_err(d40c->base->dev,
 			"unrecognized channel direction %d\n",
 			config->direction);
-		return;
+		return -EINVAL;
 	}
 
-	switch (config_addr_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		addr_width = STEDMA40_BYTE_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		addr_width = STEDMA40_HALFWORD_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		addr_width = STEDMA40_WORD_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_8_BYTES:
-		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
-		break;
-	default:
+	if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
 		dev_err(d40c->base->dev,
-			"illegal peripheral address width "
-			"requested (%d)\n",
-			config->src_addr_width);
-		return;
+			"src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
+			src_maxburst,
+			src_addr_width,
+			dst_maxburst,
+			dst_addr_width);
+		return -EINVAL;
 	}
 
-	if (chan_is_logical(d40c)) {
-		if (config_maxburst >= 16)
-			psize = STEDMA40_PSIZE_LOG_16;
-		else if (config_maxburst >= 8)
-			psize = STEDMA40_PSIZE_LOG_8;
-		else if (config_maxburst >= 4)
-			psize = STEDMA40_PSIZE_LOG_4;
-		else
-			psize = STEDMA40_PSIZE_LOG_1;
-	} else {
-		if (config_maxburst >= 16)
-			psize = STEDMA40_PSIZE_PHY_16;
-		else if (config_maxburst >= 8)
-			psize = STEDMA40_PSIZE_PHY_8;
-		else if (config_maxburst >= 4)
-			psize = STEDMA40_PSIZE_PHY_4;
-		else if (config_maxburst >= 2)
-			psize = STEDMA40_PSIZE_PHY_2;
-		else
-			psize = STEDMA40_PSIZE_PHY_1;
-	}
+	ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
+					  src_addr_width,
+					  src_maxburst);
+	if (ret)
+		return ret;
 
-	/* Set up all the endpoint configs */
-	cfg->src_info.data_width = addr_width;
-	cfg->src_info.psize = psize;
-	cfg->src_info.big_endian = false;
-	cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
-	cfg->dst_info.data_width = addr_width;
-	cfg->dst_info.psize = psize;
-	cfg->dst_info.big_endian = false;
-	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+	ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
+					  dst_addr_width,
+					  dst_maxburst);
+	if (ret)
+		return ret;
 
 	/* Fill in register values */
 	if (chan_is_logical(d40c))
@@ -2281,12 +2337,14 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 	d40c->runtime_addr = config_addr;
 	d40c->runtime_direction = config->direction;
 	dev_dbg(d40c->base->dev,
-		"configured channel %s for %s, data width %d, "
-		"maxburst %d bytes, LE, no flow control\n",
+		"configured channel %s for %s, data width %d/%d, "
+		"maxburst %d/%d elements, LE, no flow control\n",
 		dma_chan_name(chan),
 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
-		config_addr_width,
-		config_maxburst);
+		src_addr_width, dst_addr_width,
+		src_maxburst, dst_maxburst);
+
+	return 0;
 }
 
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -2307,9 +2365,8 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	case DMA_RESUME:
 		return d40_resume(d40c);
 	case DMA_SLAVE_CONFIG:
-		d40_set_runtime_config(chan,
+		return d40_set_runtime_config(chan,
 			(struct dma_slave_config *) arg);
-		return 0;
 	default:
 		break;
 	}
@@ -2340,6 +2397,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 
 		INIT_LIST_HEAD(&d40c->active);
 		INIT_LIST_HEAD(&d40c->queue);
+		INIT_LIST_HEAD(&d40c->pending_queue);
 		INIT_LIST_HEAD(&d40c->client);
 
 		tasklet_init(&d40c->tasklet, dma_tasklet,
@@ -2501,25 +2559,6 @@ static int __init d40_phy_res_init(struct d40_base *base)
 
 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 {
-	static const struct d40_reg_val dma_id_regs[] = {
-		/* Peripheral Id */
-		{ .reg = D40_DREG_PERIPHID0, .val = 0x0040},
-		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
-		/*
-		 * D40_DREG_PERIPHID2 Depends on HW revision:
-		 *  DB8500ed has 0x0008,
-		 *  ? has 0x0018,
-		 *  DB8500v1 has 0x0028
-		 *  DB8500v2 has 0x0038
-		 */
-		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
-
-		/* PCell Id */
-		{ .reg = D40_DREG_CELLID0, .val = 0x000d},
-		{ .reg = D40_DREG_CELLID1, .val = 0x00f0},
-		{ .reg = D40_DREG_CELLID2, .val = 0x0005},
-		{ .reg = D40_DREG_CELLID3, .val = 0x00b1}
-	};
 	struct stedma40_platform_data *plat_data;
 	struct clk *clk = NULL;
 	void __iomem *virtbase = NULL;
@@ -2528,8 +2567,9 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	int num_log_chans = 0;
 	int num_phy_chans;
 	int i;
-	u32 val;
-	u32 rev;
+	u32 pid;
+	u32 cid;
+	u8 rev;
 
 	clk = clk_get(&pdev->dev, NULL);
 
@@ -2553,32 +2593,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	if (!virtbase)
 		goto failure;
 
-	/* HW version check */
-	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
-		if (dma_id_regs[i].val !=
-		    readl(virtbase + dma_id_regs[i].reg)) {
-			d40_err(&pdev->dev,
-				"Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
-				dma_id_regs[i].val,
-				dma_id_regs[i].reg,
-				readl(virtbase + dma_id_regs[i].reg));
-			goto failure;
-		}
-	}
+	/* This is just a regular AMBA PrimeCell ID actually */
+	for (pid = 0, i = 0; i < 4; i++)
+		pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
+			& 255) << (i * 8);
+	for (cid = 0, i = 0; i < 4; i++)
+		cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
+			& 255) << (i * 8);
 
-	/* Get silicon revision and designer */
-	val = readl(virtbase + D40_DREG_PERIPHID2);
-
-	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
-	    D40_HW_DESIGNER) {
+	if (cid != AMBA_CID) {
+		d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
+		goto failure;
+	}
+	if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
 		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
-			val & D40_DREG_PERIPHID2_DESIGNER_MASK,
-			D40_HW_DESIGNER);
+			AMBA_MANF_BITS(pid),
+			AMBA_VENDOR_ST);
 		goto failure;
 	}
-
-	rev = (val & D40_DREG_PERIPHID2_REV_MASK) >>
-		D40_DREG_PERIPHID2_REV_POS;
+	/*
+	 * HW revision:
+	 * DB8500ed has revision 0
+	 * ? has revision 1
+	 * DB8500v1 has revision 2
+	 * DB8500v2 has revision 3
+	 */
+	rev = AMBA_REV_BITS(pid);
 
 	/* The number of physical channels on this HW */
 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 195ee65ee7f3..b44c455158de 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -184,9 +184,6 @@
 #define D40_DREG_PERIPHID0	0xFE0
 #define D40_DREG_PERIPHID1	0xFE4
 #define D40_DREG_PERIPHID2	0xFE8
-#define D40_DREG_PERIPHID2_REV_POS 4
-#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS)
-#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf
 #define D40_DREG_PERIPHID3	0xFEC
 #define D40_DREG_CELLID0	0xFF0
 #define D40_DREG_CELLID1	0xFF4
diff --git a/drivers/spi/ep93xx_spi.c b/drivers/spi/ep93xx_spi.c
index d3570071e98f..1cf645479bfe 100644
--- a/drivers/spi/ep93xx_spi.c
+++ b/drivers/spi/ep93xx_spi.c
@@ -1,7 +1,7 @@
 /*
  * Driver for Cirrus Logic EP93xx SPI controller.
  *
- * Copyright (c) 2010 Mika Westerberg
+ * Copyright (C) 2010-2011 Mika Westerberg
  *
  * Explicit FIFO handling code was inspired by amba-pl022 driver.
  *
@@ -21,13 +21,16 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/dmaengine.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <linux/scatterlist.h>
 #include <linux/spi/spi.h>
 
+#include <mach/dma.h>
 #include <mach/ep93xx_spi.h>
 
 #define SSPCR0			0x0000
@@ -71,6 +74,7 @@
  * @pdev: pointer to platform device
  * @clk: clock for the controller
  * @regs_base: pointer to ioremap()'d registers
+ * @sspdr_phys: physical address of the SSPDR register
  * @irq: IRQ number used by the driver
  * @min_rate: minimum clock rate (in Hz) supported by the controller
  * @max_rate: maximum clock rate (in Hz) supported by the controller
@@ -84,6 +88,14 @@
  * @rx: current byte in transfer to receive
  * @fifo_level: how full is FIFO (%0..%SPI_FIFO_SIZE - %1). Receiving one
  *              frame decreases this level and sending one frame increases it.
+ * @dma_rx: RX DMA channel
+ * @dma_tx: TX DMA channel
+ * @dma_rx_data: RX parameters passed to the DMA engine
+ * @dma_tx_data: TX parameters passed to the DMA engine
+ * @rx_sgt: sg table for RX transfers
+ * @tx_sgt: sg table for TX transfers
+ * @zeropage: dummy page used as RX buffer when only TX buffer is passed in by
+ *            the client
  *
  * This structure holds EP93xx SPI controller specific information. When
  * @running is %true, driver accepts transfer requests from protocol drivers.
@@ -100,6 +112,7 @@ struct ep93xx_spi {
 	const struct platform_device	*pdev;
 	struct clk			*clk;
 	void __iomem			*regs_base;
+	unsigned long			sspdr_phys;
 	int				irq;
 	unsigned long			min_rate;
 	unsigned long			max_rate;
@@ -112,6 +125,13 @@ struct ep93xx_spi {
 	size_t				tx;
 	size_t				rx;
 	size_t				fifo_level;
+	struct dma_chan			*dma_rx;
+	struct dma_chan			*dma_tx;
+	struct ep93xx_dma_data		dma_rx_data;
+	struct ep93xx_dma_data		dma_tx_data;
+	struct sg_table			rx_sgt;
+	struct sg_table			tx_sgt;
+	void				*zeropage;
 };
 
 /**
@@ -496,14 +516,195 @@ static int ep93xx_spi_read_write(struct ep93xx_spi *espi)
 		espi->fifo_level++;
 	}
 
-	if (espi->rx == t->len) {
-		msg->actual_length += t->len;
+	if (espi->rx == t->len)
 		return 0;
-	}
 
 	return -EINPROGRESS;
 }
 
+static void ep93xx_spi_pio_transfer(struct ep93xx_spi *espi)
+{
+	/*
+	 * Now everything is set up for the current transfer. We prime the TX
+	 * FIFO, enable interrupts, and wait for the transfer to complete.
+	 */
+	if (ep93xx_spi_read_write(espi)) {
+		ep93xx_spi_enable_interrupts(espi);
+		wait_for_completion(&espi->wait);
+	}
+}
+
+/**
+ * ep93xx_spi_dma_prepare() - prepares a DMA transfer
+ * @espi: ep93xx SPI controller struct
+ * @dir: DMA transfer direction
+ *
+ * Function configures the DMA, maps the buffer and prepares the DMA
+ * descriptor. Returns a valid DMA descriptor in case of success and ERR_PTR
+ * in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
+{
+	struct spi_transfer *t = espi->current_msg->state;
+	struct dma_async_tx_descriptor *txd;
+	enum dma_slave_buswidth buswidth;
+	struct dma_slave_config conf;
+	struct scatterlist *sg;
+	struct sg_table *sgt;
+	struct dma_chan *chan;
+	const void *buf, *pbuf;
+	size_t len = t->len;
+	int i, ret, nents;
+
+	if (bits_per_word(espi) > 8)
+		buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	else
+		buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+	memset(&conf, 0, sizeof(conf));
+	conf.direction = dir;
+
+	if (dir == DMA_FROM_DEVICE) {
+		chan = espi->dma_rx;
+		buf = t->rx_buf;
+		sgt = &espi->rx_sgt;
+
+		conf.src_addr = espi->sspdr_phys;
+		conf.src_addr_width = buswidth;
+	} else {
+		chan = espi->dma_tx;
+		buf = t->tx_buf;
+		sgt = &espi->tx_sgt;
+
+		conf.dst_addr = espi->sspdr_phys;
+		conf.dst_addr_width = buswidth;
+	}
+
+	ret = dmaengine_slave_config(chan, &conf);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/*
+	 * We need to split the transfer into PAGE_SIZE'd chunks. This is
+	 * because we are using @espi->zeropage to provide a zero RX buffer
+	 * for the TX transfers and we have only allocated one page for that.
+	 *
+	 * For performance reasons we allocate a new sg_table only when
+	 * needed. Otherwise we will re-use the current one. Eventually the
+	 * last sg_table is released in ep93xx_spi_release_dma().
+	 */
+
+	nents = DIV_ROUND_UP(len, PAGE_SIZE);
+	if (nents != sgt->nents) {
+		sg_free_table(sgt);
+
+		ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	pbuf = buf;
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+		if (buf) {
+			sg_set_page(sg, virt_to_page(pbuf), bytes,
+				    offset_in_page(pbuf));
+		} else {
+			sg_set_page(sg, virt_to_page(espi->zeropage),
+				    bytes, 0);
+		}
+
+		pbuf += bytes;
+		len -= bytes;
+	}
+
+	if (WARN_ON(len)) {
+		dev_warn(&espi->pdev->dev, "len = %d expected 0!", len);
+		return ERR_PTR(-EINVAL);
+	}
+
+	nents = dma_map_sg(chan->device->dev, sgt->sgl, sgt->nents, dir);
+	if (!nents)
+		return ERR_PTR(-ENOMEM);
+
+	txd = chan->device->device_prep_slave_sg(chan, sgt->sgl, nents,
+						 dir, DMA_CTRL_ACK);
+	if (!txd) {
+		dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir);
+		return ERR_PTR(-ENOMEM);
+	}
+	return txd;
+}
+
+/**
+ * ep93xx_spi_dma_finish() - finishes with a DMA transfer
+ * @espi: ep93xx SPI controller struct
+ * @dir: DMA transfer direction
+ *
+ * Function finishes with the DMA transfer. After this, the DMA buffer is
+ * unmapped.
+ */
+static void ep93xx_spi_dma_finish(struct ep93xx_spi *espi,
+				  enum dma_data_direction dir)
+{
+	struct dma_chan *chan;
+	struct sg_table *sgt;
+
+	if (dir == DMA_FROM_DEVICE) {
+		chan = espi->dma_rx;
+		sgt = &espi->rx_sgt;
+	} else {
+		chan = espi->dma_tx;
+		sgt = &espi->tx_sgt;
+	}
+
+	dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir);
+}
+
+static void ep93xx_spi_dma_callback(void *callback_param)
+{
+	complete(callback_param);
+}
+
+static void ep93xx_spi_dma_transfer(struct ep93xx_spi *espi)
+{
+	struct spi_message *msg = espi->current_msg;
+	struct dma_async_tx_descriptor *rxd, *txd;
+
+	rxd = ep93xx_spi_dma_prepare(espi, DMA_FROM_DEVICE);
+	if (IS_ERR(rxd)) {
+		dev_err(&espi->pdev->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd));
+		msg->status = PTR_ERR(rxd);
+		return;
+	}
+
+	txd = ep93xx_spi_dma_prepare(espi, DMA_TO_DEVICE);
+	if (IS_ERR(txd)) {
+		ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE);
+		dev_err(&espi->pdev->dev, "DMA TX failed: %ld\n", PTR_ERR(rxd));
+		msg->status = PTR_ERR(txd);
+		return;
+	}
+
+	/* We are ready when RX is done */
+	rxd->callback = ep93xx_spi_dma_callback;
+	rxd->callback_param = &espi->wait;
+
+	/* Now submit both descriptors and wait while they finish */
+	dmaengine_submit(rxd);
+	dmaengine_submit(txd);
+
+	dma_async_issue_pending(espi->dma_rx);
+	dma_async_issue_pending(espi->dma_tx);
+
+	wait_for_completion(&espi->wait);
+
+	ep93xx_spi_dma_finish(espi, DMA_TO_DEVICE);
+	ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE);
+}
+
 /**
  * ep93xx_spi_process_transfer() - processes one SPI transfer
  * @espi: ep93xx SPI controller struct
@@ -556,13 +757,14 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi,
 	espi->tx = 0;
 
 	/*
-	 * Now everything is set up for the current transfer. We prime the TX
-	 * FIFO, enable interrupts, and wait for the transfer to complete.
+	 * There is no point of setting up DMA for the transfers which will
+	 * fit into the FIFO and can be transferred with a single interrupt.
+	 * So in these cases we will be using PIO and don't bother for DMA.
 	 */
-	if (ep93xx_spi_read_write(espi)) {
-		ep93xx_spi_enable_interrupts(espi);
-		wait_for_completion(&espi->wait);
-	}
+	if (espi->dma_rx && t->len > SPI_FIFO_SIZE)
+		ep93xx_spi_dma_transfer(espi);
+	else
+		ep93xx_spi_pio_transfer(espi);
 
 	/*
 	 * In case of error during transmit, we bail out from processing
@@ -571,6 +773,8 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi,
 	if (msg->status)
 		return;
 
+	msg->actual_length += t->len;
+
 	/*
 	 * After this transfer is finished, perform any possible
 	 * post-transfer actions requested by the protocol driver.
@@ -752,6 +956,75 @@ static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static bool ep93xx_spi_dma_filter(struct dma_chan *chan, void *filter_param)
+{
+	if (ep93xx_dma_chan_is_m2p(chan))
+		return false;
+
+	chan->private = filter_param;
+	return true;
+}
+
+static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi)
+{
+	dma_cap_mask_t mask;
+	int ret;
+
+	espi->zeropage = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!espi->zeropage)
+		return -ENOMEM;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	espi->dma_rx_data.port = EP93XX_DMA_SSP;
+	espi->dma_rx_data.direction = DMA_FROM_DEVICE;
+	espi->dma_rx_data.name = "ep93xx-spi-rx";
+
+	espi->dma_rx = dma_request_channel(mask, ep93xx_spi_dma_filter,
+					   &espi->dma_rx_data);
+	if (!espi->dma_rx) {
+		ret = -ENODEV;
+		goto fail_free_page;
+	}
+
+	espi->dma_tx_data.port = EP93XX_DMA_SSP;
+	espi->dma_tx_data.direction = DMA_TO_DEVICE;
+	espi->dma_tx_data.name = "ep93xx-spi-tx";
+
+	espi->dma_tx = dma_request_channel(mask, ep93xx_spi_dma_filter,
+					   &espi->dma_tx_data);
+	if (!espi->dma_tx) {
+		ret = -ENODEV;
+		goto fail_release_rx;
+	}
+
+	return 0;
+
+fail_release_rx:
+	dma_release_channel(espi->dma_rx);
+	espi->dma_rx = NULL;
+fail_free_page:
+	free_page((unsigned long)espi->zeropage);
+
+	return ret;
+}
+
+static void ep93xx_spi_release_dma(struct ep93xx_spi *espi)
+{
+	if (espi->dma_rx) {
+		dma_release_channel(espi->dma_rx);
+		sg_free_table(&espi->rx_sgt);
+	}
+	if (espi->dma_tx) {
+		dma_release_channel(espi->dma_tx);
+		sg_free_table(&espi->tx_sgt);
+	}
+
+	if (espi->zeropage)
+		free_page((unsigned long)espi->zeropage);
+}
+
 static int __init ep93xx_spi_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
@@ -818,6 +1091,7 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev)
 		goto fail_put_clock;
 	}
 
+	espi->sspdr_phys = res->start + SSPDR;
 	espi->regs_base = ioremap(res->start, resource_size(res));
 	if (!espi->regs_base) {
 		dev_err(&pdev->dev, "failed to map resources\n");
@@ -832,10 +1106,13 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev)
 		goto fail_unmap_regs;
 	}
 
+	if (info->use_dma && ep93xx_spi_setup_dma(espi))
+		dev_warn(&pdev->dev, "DMA setup failed. Falling back to PIO\n");
+
 	espi->wq = create_singlethread_workqueue("ep93xx_spid");
 	if (!espi->wq) {
 		dev_err(&pdev->dev, "unable to create workqueue\n");
-		goto fail_free_irq;
+		goto fail_free_dma;
 	}
 	INIT_WORK(&espi->msg_work, ep93xx_spi_work);
 	INIT_LIST_HEAD(&espi->msg_queue);
@@ -857,7 +1134,8 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev)
 
 fail_free_queue:
 	destroy_workqueue(espi->wq);
-fail_free_irq:
+fail_free_dma:
+	ep93xx_spi_release_dma(espi);
 	free_irq(espi->irq, espi);
 fail_unmap_regs:
 	iounmap(espi->regs_base);
@@ -901,6 +1179,7 @@ static int __exit ep93xx_spi_remove(struct platform_device *pdev)
 	}
 	spin_unlock_irq(&espi->lock);
 
+	ep93xx_spi_release_dma(espi);
 	free_irq(espi->irq, espi);
 	iounmap(espi->regs_base);
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 3111385b8ca7..e6e28f37d8ec 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -172,8 +172,11 @@ struct pl08x_dma_chan {
 	int phychan_hold;
 	struct tasklet_struct tasklet;
 	char *name;
-	struct pl08x_channel_data *cd;
-	dma_addr_t runtime_addr;
+	const struct pl08x_channel_data *cd;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	u32 src_cctl;
+	u32 dst_cctl;
 	enum dma_data_direction	runtime_direction;
 	dma_cookie_t lc;
 	struct list_head pend_list;
@@ -202,7 +205,7 @@ struct pl08x_dma_chan {
  * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  */
 struct pl08x_platform_data {
-	struct pl08x_channel_data *slave_channels;
+	const struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
 	struct pl08x_channel_data memcpy_channel;
 	int (*get_signal)(struct pl08x_dma_chan *);
diff --git a/sound/soc/ep93xx/ep93xx-ac97.c b/sound/soc/ep93xx/ep93xx-ac97.c
index 104e95cda0ad..c7417c76552b 100644
--- a/sound/soc/ep93xx/ep93xx-ac97.c
+++ b/sound/soc/ep93xx/ep93xx-ac97.c
@@ -106,12 +106,12 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info;
 
 static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_out = {
 	.name		= "ac97-pcm-out",
-	.dma_port	= EP93XX_DMA_M2P_PORT_AAC1,
+	.dma_port	= EP93XX_DMA_AAC1,
 };
 
 static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_in = {
 	.name		= "ac97-pcm-in",
-	.dma_port	= EP93XX_DMA_M2P_PORT_AAC1,
+	.dma_port	= EP93XX_DMA_AAC1,
 };
 
 static inline unsigned ep93xx_ac97_read_reg(struct ep93xx_ac97_info *info,
diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c
index 042f4e93746f..30df42568dbb 100644
--- a/sound/soc/ep93xx/ep93xx-i2s.c
+++ b/sound/soc/ep93xx/ep93xx-i2s.c
@@ -70,11 +70,11 @@ struct ep93xx_i2s_info {
 struct ep93xx_pcm_dma_params ep93xx_i2s_dma_params[] = {
 	[SNDRV_PCM_STREAM_PLAYBACK] = {
 		.name		= "i2s-pcm-out",
-		.dma_port	= EP93XX_DMA_M2P_PORT_I2S1,
+		.dma_port	= EP93XX_DMA_I2S1,
 	},
 	[SNDRV_PCM_STREAM_CAPTURE] = {
 		.name		= "i2s-pcm-in",
-		.dma_port	= EP93XX_DMA_M2P_PORT_I2S1,
+		.dma_port	= EP93XX_DMA_I2S1,
 	},
 };
 
diff --git a/sound/soc/ep93xx/ep93xx-pcm.c b/sound/soc/ep93xx/ep93xx-pcm.c
index a456e491155f..a07f99c9c375 100644
--- a/sound/soc/ep93xx/ep93xx-pcm.c
+++ b/sound/soc/ep93xx/ep93xx-pcm.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/slab.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 
 #include <sound/core.h>
@@ -53,43 +54,34 @@ static const struct snd_pcm_hardware ep93xx_pcm_hardware = {
 
 struct ep93xx_runtime_data
 {
-	struct ep93xx_dma_m2p_client	cl;
-	struct ep93xx_pcm_dma_params	*params;
 	int				pointer_bytes;
-	struct tasklet_struct		period_tasklet;
 	int				periods;
-	struct ep93xx_dma_buffer	buf[32];
+	int				period_bytes;
+	struct dma_chan			*dma_chan;
+	struct ep93xx_dma_data		dma_data;
 };
 
-static void ep93xx_pcm_period_elapsed(unsigned long data)
+static void ep93xx_pcm_dma_callback(void *data)
 {
-	struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data;
-	snd_pcm_period_elapsed(substream);
-}
+	struct snd_pcm_substream *substream = data;
+	struct ep93xx_runtime_data *rtd = substream->runtime->private_data;
 
-static void ep93xx_pcm_buffer_started(void *cookie,
-				      struct ep93xx_dma_buffer *buf)
-{
+	rtd->pointer_bytes += rtd->period_bytes;
+	rtd->pointer_bytes %= rtd->period_bytes * rtd->periods;
+
+	snd_pcm_period_elapsed(substream);
 }
 
-static void ep93xx_pcm_buffer_finished(void *cookie, 
-				       struct ep93xx_dma_buffer *buf, 
-				       int bytes, int error)
+static bool ep93xx_pcm_dma_filter(struct dma_chan *chan, void *filter_param)
 {
-	struct snd_pcm_substream *substream = cookie;
-	struct ep93xx_runtime_data *rtd = substream->runtime->private_data;
-
-	if (buf == rtd->buf + rtd->periods - 1)
-		rtd->pointer_bytes = 0;
-	else
-		rtd->pointer_bytes += buf->size;
+	struct ep93xx_dma_data *data = filter_param;
 
-	if (!error) {
-		ep93xx_dma_m2p_submit_recursive(&rtd->cl, buf);
-		tasklet_schedule(&rtd->period_tasklet);
-	} else {
-		snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
+	if (data->direction == ep93xx_dma_chan_direction(chan)) {
+		chan->private = data;
+		return true;
 	}
+
+	return false;
 }
 
 static int ep93xx_pcm_open(struct snd_pcm_substream *substream)
@@ -98,30 +90,38 @@ static int ep93xx_pcm_open(struct snd_pcm_substream *substream)
 	struct snd_soc_dai *cpu_dai = soc_rtd->cpu_dai;
 	struct ep93xx_pcm_dma_params *dma_params;
 	struct ep93xx_runtime_data *rtd;    
+	dma_cap_mask_t mask;
 	int ret;
 
-	dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream);
+	ret = snd_pcm_hw_constraint_integer(substream->runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+
 	snd_soc_set_runtime_hwparams(substream, &ep93xx_pcm_hardware);
 
 	rtd = kmalloc(sizeof(*rtd), GFP_KERNEL);
 	if (!rtd) 
 		return -ENOMEM;
 
-	memset(&rtd->period_tasklet, 0, sizeof(rtd->period_tasklet));
-	rtd->period_tasklet.func = ep93xx_pcm_period_elapsed;
-	rtd->period_tasklet.data = (unsigned long)substream;
-
-	rtd->cl.name = dma_params->name;
-	rtd->cl.flags = dma_params->dma_port | EP93XX_DMA_M2P_IGNORE_ERROR |
-		((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
-		 EP93XX_DMA_M2P_TX : EP93XX_DMA_M2P_RX);
-	rtd->cl.cookie = substream;
-	rtd->cl.buffer_started = ep93xx_pcm_buffer_started;
-	rtd->cl.buffer_finished = ep93xx_pcm_buffer_finished;
-	ret = ep93xx_dma_m2p_client_register(&rtd->cl);
-	if (ret < 0) {
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_CYCLIC, mask);
+
+	dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream);
+	rtd->dma_data.port = dma_params->dma_port;
+	rtd->dma_data.name = dma_params->name;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		rtd->dma_data.direction = DMA_TO_DEVICE;
+	else
+		rtd->dma_data.direction = DMA_FROM_DEVICE;
+
+	rtd->dma_chan = dma_request_channel(mask, ep93xx_pcm_dma_filter,
+					    &rtd->dma_data);
+	if (!rtd->dma_chan) {
 		kfree(rtd);
-		return ret;
+		return -EINVAL;
 	}
 	
 	substream->runtime->private_data = rtd;
@@ -132,31 +132,52 @@ static int ep93xx_pcm_close(struct snd_pcm_substream *substream)
 {
 	struct ep93xx_runtime_data *rtd = substream->runtime->private_data;
 
-	ep93xx_dma_m2p_client_unregister(&rtd->cl);
+	dma_release_channel(rtd->dma_chan);
 	kfree(rtd);
 	return 0;
 }
 
+static int ep93xx_pcm_dma_submit(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ep93xx_runtime_data *rtd = runtime->private_data;
+	struct dma_chan *chan = rtd->dma_chan;
+	struct dma_device *dma_dev = chan->device;
+	struct dma_async_tx_descriptor *desc;
+
+	rtd->pointer_bytes = 0;
+	desc = dma_dev->device_prep_dma_cyclic(chan, runtime->dma_addr,
+					       rtd->period_bytes * rtd->periods,
+					       rtd->period_bytes,
+					       rtd->dma_data.direction);
+	if (!desc)
+		return -EINVAL;
+
+	desc->callback = ep93xx_pcm_dma_callback;
+	desc->callback_param = substream;
+
+	dmaengine_submit(desc);
+	return 0;
+}
+
+static void ep93xx_pcm_dma_flush(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ep93xx_runtime_data *rtd = runtime->private_data;
+
+	dmaengine_terminate_all(rtd->dma_chan);
+}
+
 static int ep93xx_pcm_hw_params(struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct ep93xx_runtime_data *rtd = runtime->private_data;
-	size_t totsize = params_buffer_bytes(params);
-	size_t period = params_period_bytes(params);
-	int i;
 
 	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
-	runtime->dma_bytes = totsize;
-
-	rtd->periods = (totsize + period - 1) / period;
-	for (i = 0; i < rtd->periods; i++) {
-		rtd->buf[i].bus_addr = runtime->dma_addr + (i * period);
-		rtd->buf[i].size = period;
-		if ((i + 1) * period > totsize)
-			rtd->buf[i].size = totsize - (i * period);
-	}
 
+	rtd->periods = params_periods(params);
+	rtd->period_bytes = params_period_bytes(params);
 	return 0;
 }
 
@@ -168,24 +189,20 @@ static int ep93xx_pcm_hw_free(struct snd_pcm_substream *substream)
 
 static int ep93xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
-	struct ep93xx_runtime_data *rtd = substream->runtime->private_data;
 	int ret;
-	int i;
 
 	ret = 0;
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_RESUME:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-		rtd->pointer_bytes = 0;
-		for (i = 0; i < rtd->periods; i++)
-			ep93xx_dma_m2p_submit(&rtd->cl, rtd->buf + i);
+		ret = ep93xx_pcm_dma_submit(substream);
 		break;
 
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-		ep93xx_dma_m2p_flush(&rtd->cl);
+		ep93xx_pcm_dma_flush(substream);
 		break;
 
 	default: