From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 8 Apr 2009 14:28:37 -0700
Subject: async_tx: rename zero_sum to val

'zero_sum' does not properly describe the operation of generating parity
and checking that it validates against an existing buffer.  Change the
name of the operation to 'val' (for 'validate').  This is in
anticipation of the p+q case where it is a requirement to identify the
target parity buffers separately from the source buffers, because the
target parity buffers will not have corresponding pq coefficients.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/async_tx.h  | 2 +-
 include/linux/dmaengine.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97fa..513150d8c25b 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	dma_async_tx_callback cb_fn, void *cb_fn_param);
 
 struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
+async_xor_val(struct page *dest, struct page **src_list,
 	unsigned int offset, int src_cnt, size_t len,
 	u32 *result, enum async_tx_flags flags,
 	struct dma_async_tx_descriptor *depend_tx,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2e2aa3df170c..6768727d00d7 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -55,8 +55,8 @@ enum dma_transaction_type {
 	DMA_PQ_XOR,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
-	DMA_ZERO_SUM,
-	DMA_PQ_ZERO_SUM,
+	DMA_XOR_VAL,
+	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
@@ -214,7 +214,7 @@ struct dma_async_tx_descriptor {
  * @device_free_chan_resources: release DMA channel's resources
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -243,7 +243,7 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		unsigned int src_cnt, size_t len, unsigned long flags);
-	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, u32 *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
-- 
cgit v1.2.3


From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Apr 2009 16:16:18 -0700
Subject: async_tx: kill ASYNC_TX_DEP_ACK flag

In support of inter-channel chaining async_tx utilizes an ack flag to
gate whether a dependent operation can be chained to another.  While the
flag is not set the chain can be considered open for appending.  Setting
the ack flag closes the chain and flags the descriptor for garbage
collection.  The ASYNC_TX_DEP_ACK flag essentially means "close the
chain after adding this dependency".  Since each operation can only have
one child the api now implicitly sets the ack flag at dependency
submission time.  This removes an unnecessary management burden from
clients of the api.

[ Impact: clean up and enforce one dependency per operation ]

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |  9 ++++-----
 crypto/async_tx/async_memcpy.c        |  2 +-
 crypto/async_tx/async_memset.c        |  2 +-
 crypto/async_tx/async_tx.c            |  4 ++--
 crypto/async_tx/async_xor.c           |  6 ++----
 drivers/md/raid5.c                    | 25 +++++++++++--------------
 include/linux/async_tx.h              |  4 +---
 7 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 4af12180d191..76feda8541dc 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to
 recycle (or free) the descriptor.  A descriptor can be acked by one of the
 following methods:
 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
 3/ calling async_tx_ack() on the descriptor.
 
 3.4 When does the operation execute?
@@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs,
 
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
 		       ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-			  ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL);
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
+		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK,
 		       tx, complete_xor_copy_xor, NULL);
 
 	async_tx_issue_pending_all();
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c416..7117ec6f1b74 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -35,7 +35,7 @@
  * @src: src page
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memcpy depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb244..b2f133885b7f 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,7 +35,7 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memset depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fef..3766bc3d7d89 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 	if (flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
-	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+	if (depend_tx)
 		async_tx_ack(depend_tx);
 }
 EXPORT_SYMBOL_GPL(async_tx_submit);
@@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit);
 /**
  * async_trigger_callback - schedules the callback function to be run after
  * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: 'callback' requires the completion of this transaction
  * @cb_fn: function to call after depend_tx completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e0580b0ea533..3cc5dc763b54 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 				_cb_param);
 
 		depend_tx = tx;
-		flags |= ASYNC_TX_DEP_ACK;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
@@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8d2d35ed298..0ef5362c8d02 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-					b_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  b_offset, clen, 0,
+						  tx, NULL, NULL);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-					page_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  page_offset, clen, 0,
+						  tx, NULL, NULL);
 		}
 		if (clen < len) /* hit end of page */
 			break;
@@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-		ops_complete_biofill, sh);
+	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	}
 
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
-		ops_complete_prexor, sh);
+		       ASYNC_TX_XOR_DROP_DST, tx,
+		       ops_complete_prexor, sh);
 
 	return tx;
 }
@@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
 	 * for the synchronous xor case
 	 */
-	flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+	flags = ASYNC_TX_ACK |
 		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
 
 	atomic_inc(&sh->count);
@@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh)
 		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
+	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
 		ops_complete_check, sh);
 }
 
@@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 
 			/* place all the copies on one channel */
 			tx = async_memcpy(sh2->dev[dd_idx].page,
-				sh->dev[i].page, 0, 0, STRIPE_SIZE,
-				ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
+					  0, tx, NULL, NULL);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 513150d8c25b..9f14cd540cd2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,13 +58,11 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
-	ASYNC_TX_ACK		 = (1 << 3),
-	ASYNC_TX_DEP_ACK	 = (1 << 4),
+	ASYNC_TX_ACK		 = (1 << 2),
 };
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 3 Jun 2009 11:43:59 -0700
Subject: async_tx: structify submission arguments, add scribble

Prepare the api for the arrival of a new parameter, 'scribble'.  This
will allow callers to identify scratchpad memory for dma address or page
address conversions.  As this adds yet another parameter, take this
opportunity to convert the common submission parameters (flags,
dependency, callback, and callback argument) into an object that is
passed by reference.

Also, take this opportunity to fix up the kerneldoc and add notes about
the relevant ASYNC_TX_* flags for each routine.

[ Impact: moves api pass-by-value parameters to a pass-by-reference struct ]

Signed-off-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   6 +-
 crypto/async_tx/async_memcpy.c        |  26 +++----
 crypto/async_tx/async_memset.c        |  25 +++----
 crypto/async_tx/async_tx.c            |  51 +++++++-------
 crypto/async_tx/async_xor.c           | 123 +++++++++++++++++-----------------
 drivers/md/raid5.c                    |  59 +++++++++-------
 include/linux/async_tx.h              |  84 ++++++++++++++---------
 7 files changed, 200 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 76feda8541dc..dfe0475f7919 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,11 +54,7 @@ features surfaced as a result:
 
 3.1 General format of the API:
 struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-		  enum async_tx_flags flags,
-        	  struct dma_async_tx_descriptor *dependency,
-        	  dma_async_tx_callback callback_routine,
-		  void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
 
 3.2 Supported operations:
 memcpy  - memory copy between a source and a destination buffer
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 7117ec6f1b74..89e05556f3df 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,28 @@
  * async_memcpy - attempt to copy memory with a dma engine.
  * @dest: destination page
  * @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
 		src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		kunmap_atomic(dest_buf, KM_USER0);
 		kunmap_atomic(src_buf, KM_USER1);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b2f133885b7f..c14437238f4c 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,23 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
 						      &dest, 1, NULL, 0, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
@@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else { /* run the memset synchronously */
 		void *dest_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		dest_buf = (void *) (((char *) page_address(dest)) + offset);
+		dest_buf = page_address(dest) + offset;
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		memset(dest_buf, val, len);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 3766bc3d7d89..802a5ce437d9 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -45,13 +45,15 @@ static void __exit async_tx_exit(void)
 /**
  * __async_tx_find_channel - find a channel to carry out the operation or let
  *	the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
  * @tx_type: transaction type
  */
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type)
 {
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
 	/* see if we can keep the chain on one channel */
 	if (depend_tx &&
 	    dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 
 
 /**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- * 	new operations to prevent a circular locking dependency with
- * 	drivers that already hold a channel lock when calling
- * 	async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
  * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
  * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
  * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
  */
 enum submit_disposition {
 	ASYNC_TX_SUBMITTED,
@@ -160,11 +163,12 @@ enum submit_disposition {
 
 void
 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+		struct async_submit_ctl *submit)
 {
-	tx->callback = cb_fn;
-	tx->callback_param = cb_param;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+	tx->callback = submit->cb_fn;
+	tx->callback_param = submit->cb_param;
 
 	if (depend_tx) {
 		enum submit_disposition s;
@@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 		tx->tx_submit(tx);
 	}
 
-	if (flags & ASYNC_TX_ACK)
+	if (submit->flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
 	if (depend_tx)
@@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 EXPORT_SYMBOL_GPL(async_tx_submit);
 
 /**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
  */
 struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *tx;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
 
 	if (depend_tx) {
 		chan = depend_tx->chan;
@@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags,
 	if (tx) {
 		pr_debug("%s: (async)\n", __func__);
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		pr_debug("%s: (sync)\n", __func__);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 3cc5dc763b54..691fa98a18c4 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -34,18 +34,16 @@
 static __async_inline struct dma_async_tx_descriptor *
 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	     unsigned int offset, int src_cnt, size_t len,
-	     enum async_tx_flags flags,
-	     struct dma_async_tx_descriptor *depend_tx,
-	     dma_async_tx_callback cb_fn, void *cb_param)
+	     struct async_submit_ctl *submit)
 {
 	struct dma_device *dma = chan->device;
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
 	struct dma_async_tx_descriptor *tx = NULL;
 	int src_off = 0;
 	int i;
-	dma_async_tx_callback _cb_fn;
-	void *_cb_param;
-	enum async_tx_flags async_flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	void *cb_param_orig = submit->cb_param;
+	enum async_tx_flags flags_orig = submit->flags;
 	enum dma_ctrl_flags dma_flags;
 	int xor_src_cnt;
 	dma_addr_t dma_dest;
@@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	}
 
 	while (src_cnt) {
-		async_flags = flags;
+		submit->flags = flags_orig;
 		dma_flags = 0;
 		xor_src_cnt = min(src_cnt, dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
@@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 * buffer mapped
 		 */
 		if (src_cnt > xor_src_cnt) {
-			async_flags &= ~ASYNC_TX_ACK;
+			submit->flags &= ~ASYNC_TX_ACK;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
-			_cb_fn = NULL;
-			_cb_param = NULL;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
 		} else {
-			_cb_fn = cb_fn;
-			_cb_param = cb_param;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
 		}
-		if (_cb_fn)
+		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
 
 		/* Since we have clobbered the src_list we are committed
@@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 					      xor_src_cnt, len, dma_flags);
 
 		if (unlikely(!tx))
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 		/* spin wait for the preceeding transactions to complete */
 		while (unlikely(!tx)) {
@@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 						      dma_flags);
 		}
 
-		async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
-				_cb_param);
-
-		depend_tx = tx;
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	    int src_cnt, size_t len, enum async_tx_flags flags,
-	    dma_async_tx_callback cb_fn, void *cb_param)
+	    int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
 	int i;
 	int xor_src_cnt;
@@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	/* set destination address */
 	dest_buf = page_address(dest) + offset;
 
-	if (flags & ASYNC_TX_XOR_ZERO_DST)
+	if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
 		memset(dest_buf, 0, len);
 
 	while (src_cnt > 0) {
@@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		src_off += xor_src_cnt;
 	}
 
-	async_tx_sync_epilog(cb_fn, cb_param);
+	async_tx_sync_epilog(submit);
 }
 
 /**
  * async_xor - attempt to xor a set of blocks with a dma engine.
- *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- *	flag must be set to not include dest data in the calculation.  The
- *	assumption with dma eninges is that they only use the destination
- *	buffer as a source when it is explicity specified in the source list.
  * @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- *	at index zero).  The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation.  The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	  int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	BUG_ON(src_cnt <= 1);
@@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
 		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-				    flags, depend_tx, cb_fn, cb_param);
+				    submit);
 	} else {
 		/* run the xor synchronously */
 		pr_debug("%s (sync): len: %zu\n", __func__, len);
@@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		/* in the sync case the dest is an implied source
 		 * (assumes the dest is the first source)
 		 */
-		if (flags & ASYNC_TX_XOR_DROP_DST) {
+		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
 			src_cnt--;
 			src_list++;
 		}
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		do_sync_xor(dest, src_list, offset, src_cnt, len,
-			    flags, cb_fn, cb_param);
+		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		return NULL;
 	}
@@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
 /**
  * async_xor_val - attempt a xor parity check with a dma engine.
  * @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages.  The dest page must be listed as a source
- * 	at index zero.  The contents of this array may be overwritten.
+ * @src_list: array of source pages
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
@@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list,
 
 	if (device && src_cnt <= device->max_xor) {
 		dma_addr_t *dma_src = (dma_addr_t *) src_list;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
@@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list,
 						     len, result,
 						     dma_prep_flags);
 		if (unlikely(!tx)) {
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 			while (!tx) {
 				dma_async_issue_pending(chan);
@@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list,
 			}
 		}
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
-		unsigned long xor_flags = flags;
+		enum async_tx_flags flags_orig = submit->flags;
 
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		xor_flags |= ASYNC_TX_XOR_DROP_DST;
-		xor_flags &= ~ASYNC_TX_ACK;
+		submit->flags |= ASYNC_TX_XOR_DROP_DST;
+		submit->flags &= ~ASYNC_TX_ACK;
 
-		tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
-			depend_tx, NULL, NULL);
+		tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		async_tx_quiesce(&tx);
 
 		*result = page_is_zero(dest, offset, len) ? 0 : 1;
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
+		submit->flags = flags_orig;
 	}
 
 	return tx;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0ef5362c8d02..e1920f23579f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	struct page *bio_page;
 	int i;
 	int page_offset;
+	struct async_submit_ctl submit;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-						  b_offset, clen, 0,
-						  tx, NULL, NULL);
+						  b_offset, clen, &submit);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-						  page_offset, clen, 0,
-						  tx, NULL, NULL);
+						  page_offset, clen, &submit);
 		}
+		/* chain the operations */
+		submit.depend_tx = tx;
+
 		if (clen < len) /* hit end of page */
 			break;
 		page_offset +=  len;
@@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	raid5_conf_t *conf = sh->raid_conf;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu\n", __func__,
@@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+	async_trigger_callback(&submit);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 	struct page *xor_dest = tgt->page;
 	int count = 0;
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu block: %d\n",
@@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 
 	atomic_inc(&sh->count);
 
+	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+			  ops_complete_compute5, sh, NULL);
 	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			0, NULL, ops_complete_compute5, sh);
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
 	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			ASYNC_TX_XOR_ZERO_DST, NULL,
-			ops_complete_compute5, sh);
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	int count = 0, pd_idx = sh->pd_idx, i;
+	struct async_submit_ctl submit;
 
 	/* existing parity data subtracted */
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 			xor_srcs[count++] = dev->page;
 	}
 
-	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		       ASYNC_TX_XOR_DROP_DST, tx,
-		       ops_complete_prexor, sh);
+	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+			  ops_complete_prexor, sh, NULL);
+	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	/* kernel stack size limits the total number of disks */
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
-
+	struct async_submit_ctl submit;
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest;
 	int prexor = 0;
@@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 
 	atomic_inc(&sh->count);
 
-	if (unlikely(count == 1)) {
-		flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
-	} else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
+	init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL);
+	if (unlikely(count == 1))
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+	else
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 }
 
 static void ops_complete_check(void *stripe_head_ref)
@@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh)
 			xor_srcs[count++] = dev->page;
 	}
 
+	init_async_submit(&submit, 0, NULL, NULL, NULL, NULL);
 	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+			   &sh->ops.zero_sum_result, &submit);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
-		ops_complete_check, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+	tx = async_trigger_callback(&submit);
 }
 
 static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
@@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 		if (i != sh->pd_idx && i != sh->qd_idx) {
 			int dd_idx, j;
 			struct stripe_head *sh2;
+			struct async_submit_ctl submit;
 
 			sector_t bn = compute_blocknr(sh, i, 1);
 			sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 			}
 
 			/* place all the copies on one channel */
+			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 			tx = async_memcpy(sh2->dev[dd_idx].page,
 					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
-					  0, tx, NULL, NULL);
+					  &submit);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 9f14cd540cd2..00cfb637ddf2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -65,6 +65,22 @@ enum async_tx_flags {
 	ASYNC_TX_ACK		 = (1 << 2),
 };
 
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+	enum async_tx_flags flags;
+	struct dma_async_tx_descriptor *depend_tx;
+	dma_async_tx_callback cb_fn;
+	void *cb_param;
+	void *scribble;
+};
+
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
@@ -73,8 +89,8 @@ enum async_tx_flags {
 #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
 	 __async_tx_find_channel(dep, type)
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type);
 #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
 #else
 static inline void async_tx_issue_pending_all(void)
@@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void)
 }
 
 static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type, struct page **dst, int dst_count,
-	struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+		      enum dma_transaction_type tx_type, struct page **dst,
+		      int dst_count, struct page **src, int src_count,
+		      size_t len)
 {
 	return NULL;
 }
@@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
  * @cb_fn_param: parameter to pass to the callback routine
  */
 static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
+{
+	if (submit->cb_fn)
+		submit->cb_fn(submit->cb_param);
+}
+
+typedef union {
+	unsigned long addr;
+	struct page *page;
+	dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+		  struct dma_async_tx_descriptor *tx,
+		  dma_async_tx_callback cb_fn, void *cb_param,
+		  addr_conv_t *scribble)
 {
-	if (cb_fn)
-		cb_fn(cb_fn_param);
+	args->flags = flags;
+	args->depend_tx = tx;
+	args->cb_fn = cb_fn;
+	args->cb_param = cb_param;
+	args->scribble = scribble;
 }
 
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+		     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	  int src_cnt, size_t len, struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     size_t len, struct async_submit_ctl *submit);
 
-struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 29 Aug 2009 19:09:26 -0700
Subject: async_tx: add sum check flags

Replace the flat zero_sum_result with a collection of flags to contain
the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed
solomon syndrome) zero-sum result.  Use the SUM_CHECK_ namespace instead
of DMA_ since these flags will be used on non-dma-zero-sum enabled
platforms.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/include/asm/hardware/iop3xx-adma.h |  5 +++--
 arch/arm/mach-iop13xx/include/mach/adma.h   | 12 +++++++-----
 crypto/async_tx/async_xor.c                 |  4 ++--
 drivers/md/raid5.c                          |  2 +-
 drivers/md/raid5.h                          |  5 +++--
 include/linux/async_tx.h                    |  2 +-
 include/linux/dmaengine.h                   | 21 ++++++++++++++++++++-
 7 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2c..26eefea02314 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->src[0] = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 
 	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-	return desc_ctrl.zero_result_err;
+	return desc_ctrl.zero_result_err << SUM_CHECK_P;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f2174..1cd31df8924d 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->block_fill_data = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 	struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+	enum sum_check_flags flags;
 
 	BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
 
-	if (desc_ctrl.pq_xfer_en)
-		return byte_count.zero_result_err_q;
-	else
-		return byte_count.zero_result_err;
+	flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+	flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+	return flags;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 1e96c4df7061..78fb7780272a 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  */
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
@@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 
 		async_tx_quiesce(&tx);
 
-		*result = page_is_zero(dest, offset, len) ? 0 : 1;
+		*result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
 
 		async_tx_sync_epilog(submit);
 		submit->flags = flags_orig;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7727954cf726..1f2a266f3cf7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 		 * we are done.  Otherwise update the mismatch count and repair
 		 * parity if !MD_RECOVERY_CHECK
 		 */
-		if (sh->ops.zero_sum_result == 0)
+		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
 			/* parity is correct (on disc,
 			 * not in buffer any more)
 			 */
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index e7baabffee86..75f2c6c4cf90 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
 #define _RAID5_H
 
 #include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
 
 /*
  *
@@ -215,8 +216,8 @@ struct stripe_head {
 	 * @target - STRIPE_OP_COMPUTE_BLK target
 	 */
 	struct stripe_operations {
-		int		   target;
-		u32		   zero_sum_result;
+		int		     target;
+		enum sum_check_flags zero_sum_result;
 	} ops;
 	struct r5dev {
 		struct bio	req;
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 00cfb637ddf2..3d21a2517518 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 6768727d00d7..02447afcebad 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -86,6 +86,25 @@ enum dma_ctrl_flags {
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
 };
 
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+	SUM_CHECK_P = 0,
+	SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+	SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+	SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
 /**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
@@ -245,7 +264,7 @@ struct dma_device {
 		unsigned int src_cnt, size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
-		size_t len, u32 *result, unsigned long flags);
+		size_t len, enum sum_check_flags *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
-- 
cgit v1.2.3


From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:19:02 -0700
Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx

We currently walk the parent chain when waiting for a given tx to
complete however this walk may race with the driver cleanup routine.
The routines in async_raid6_recov.c may fall back to the synchronous
path at any point so we need to be prepared to call async_tx_quiesce()
(which calls  dma_wait_for_async_tx).  To remove the ->parent walk we
guarantee that every time a dependency is attached ->issue_pending() is
invoked, then we can simply poll the initial descriptor until
completion.

This also allows for a lighter weight 'issue pending' implementation as
there is no longer a requirement to iterate through all the channels'
->issue_pending() routines as long as operations have been submitted in
an ordered chain.  async_tx_issue_pending() is added for this case.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 13 +++++++------
 drivers/dma/dmaengine.c    | 45 ++++++++++-----------------------------------
 include/linux/async_tx.h   | 23 +++++++++++++++++++++++
 3 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 6e37ad3f4417..60615fedcf5e 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -77,8 +77,8 @@ static void
 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			struct dma_async_tx_descriptor *tx)
 {
-	struct dma_chan *chan;
-	struct dma_device *device;
+	struct dma_chan *chan = depend_tx->chan;
+	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
 	/* first check to see if we can still append to depend_tx */
@@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	}
 	spin_unlock_bh(&depend_tx->lock);
 
-	if (!intr_tx)
+	/* attached dependency, flush the parent channel */
+	if (!intr_tx) {
+		device->device_issue_pending(chan);
 		return;
-
-	chan = depend_tx->chan;
-	device = chan->device;
+	}
 
 	/* see if we can schedule an interrupt
 	 * otherwise poll for completion
@@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			intr_tx->tx_submit(intr_tx);
 			async_tx_ack(intr_tx);
 		}
+		device->device_issue_pending(chan);
 	} else {
 		if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
 			panic("%s: DMA_ERROR waiting for depend_tx\n",
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6781e8f3c064..e002e0e0d055 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted).  Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
  */
 enum dma_status
 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-	enum dma_status status;
-	struct dma_async_tx_descriptor *iter;
-	struct dma_async_tx_descriptor *parent;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
 	if (!tx)
 		return DMA_SUCCESS;
 
-	WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
-		  " %s\n", __func__, dma_chan_name(tx->chan));
-
-	/* poll through the dependency chain, return when tx is complete */
-	do {
-		iter = tx;
-
-		/* find the root of the unsubmitted dependency chain */
-		do {
-			parent = iter->parent;
-			if (!parent)
-				break;
-			else
-				iter = parent;
-		} while (parent);
-
-		/* there is a small window for ->parent == NULL and
-		 * ->cookie == -EBUSY
-		 */
-		while (iter->cookie == -EBUSY)
-			cpu_relax();
-
-		status = dma_sync_wait(iter->chan, iter->cookie);
-	} while (status == DMA_IN_PROGRESS || (iter != tx));
-
-	return status;
+	while (tx->cookie == -EBUSY) {
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			pr_err("%s timeout waiting for descriptor submission\n",
+				__func__);
+			return DMA_ERROR;
+		}
+		cpu_relax();
+	}
+	return dma_sync_wait(tx->chan, tx->cookie);
 }
 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 3d21a2517518..12a2efcbd565 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -83,6 +83,24 @@ struct async_submit_ctl {
 
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	if (likely(tx)) {
+		struct dma_chan *chan = tx->chan;
+		struct dma_device *dma = chan->device;
+
+		dma->device_issue_pending(chan);
+	}
+}
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
@@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void)
 	do { } while (0);
 }
 
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	do { } while (0);
+}
+
 static inline struct dma_chan *
 async_tx_find_channel(struct async_submit_ctl *submit,
 		      enum dma_transaction_type tx_type, struct page **dst,
-- 
cgit v1.2.3


From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:36 -0700
Subject: async_tx: add support for asynchronous GF multiplication

[ Based on an original patch by Yuri Tikhonov ]

This adds support for doing asynchronous GF multiplication by adding
two additional functions to the async_tx API:

 async_gen_syndrome() does simultaneous XOR and Galois field
    multiplication of sources.

 async_syndrome_val() validates the given source buffers against known P
    and Q values.

When a request is made to run async_pq against more than the hardware
maximum number of supported sources we need to reuse the previous
generated P and Q values as sources into the next operation.  Care must
be taken to remove Q from P' and P from Q'.  For example to perform a 5
source pq op with hardware that only supports 4 sources at a time the
following approach is taken:

p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))

p' = p + q + q + src4 = p + src4
q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4

Note: 4 is the minimum acceptable maxpq otherwise we punt to
synchronous-software path.

The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as
sources (in the above manner) and fill the remaining slots up to maxpq
with the new sources/coefficients.

Note1: Some devices have native support for P+Q continuation and can skip
this extra work.  Devices with this capability can advertise it with
dma_set_maxpq.  It is up to each driver how to handle the
DMA_PREP_CONTINUE flag.

Note2: The api supports disabling the generation of P when generating Q,
this is ignored by the synchronous path but is implemented by some dma
devices to save unnecessary writes.  In this case the continuation
algorithm is simplified to only reuse Q as a source.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   3 +
 arch/arm/mach-iop13xx/setup.c         |   2 +-
 crypto/async_tx/Kconfig               |   4 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_pq.c            | 388 ++++++++++++++++++++++++++++++++++
 crypto/async_tx/async_xor.c           |   2 +-
 drivers/dma/dmaengine.c               |   4 +
 drivers/dma/iop-adma.c                |   2 +-
 include/linux/async_tx.h              |   9 +
 include/linux/dmaengine.h             |  87 +++++++-
 10 files changed, 493 insertions(+), 9 deletions(-)
 create mode 100644 crypto/async_tx/async_pq.c

(limited to 'include/linux')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 6b15e488c0e7..0e48e054d69a 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -64,6 +64,9 @@ xor     - xor a series of source buffers and write the result to a
 xor_val - xor a series of source buffers and set a flag if the
 	  result is zero.  The implementation attempts to prevent
 	  writes to memory
+pq	- generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 9800228b71d3..2e7ca0d75f8a 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
+			dma_cap_set(DMA_PQ, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb39145986..cb6d7314f198 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,7 @@ config ASYNC_MEMSET
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_PQ
+	tristate
+	select ASYNC_CORE
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fbc..1b9926588259 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
 obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 000000000000..108b21efb499
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+	return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+		      const unsigned char *scfs, unsigned int offset, int disks,
+		      size_t len, dma_addr_t *dma_src,
+		      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct dma_device *dma = chan->device;
+	enum dma_ctrl_flags dma_flags = 0;
+	enum async_tx_flags flags_orig = submit->flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	dma_async_tx_callback cb_param_orig = submit->cb_param;
+	int src_cnt = disks - 2;
+	unsigned char coefs[src_cnt];
+	unsigned short pq_src_cnt;
+	dma_addr_t dma_dest[2];
+	int src_off = 0;
+	int idx;
+	int i;
+
+	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+	if (P(blocks, disks))
+		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_P;
+	if (Q(blocks, disks))
+		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	/* convert source addresses being careful to collapse 'empty'
+	 * sources and update the coefficients accordingly
+	 */
+	for (i = 0, idx = 0; i < src_cnt; i++) {
+		if (is_raid6_zero_block(blocks[i]))
+			continue;
+		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+					    DMA_TO_DEVICE);
+		coefs[idx] = scfs[i];
+		idx++;
+	}
+	src_cnt = idx;
+
+	while (src_cnt > 0) {
+		submit->flags = flags_orig;
+		pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+		/* if we are submitting additional pqs, leave the chain open,
+		 * clear the callback parameters, and leave the destination
+		 * buffers mapped
+		 */
+		if (src_cnt > pq_src_cnt) {
+			submit->flags &= ~ASYNC_TX_ACK;
+			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
+		} else {
+			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
+			if (cb_fn_orig)
+				dma_flags |= DMA_PREP_INTERRUPT;
+		}
+
+		/* Since we have clobbered the src_list we are committed
+		 * to doing this asynchronously.  Drivers force forward
+		 * progress in case they can not provide a descriptor
+		 */
+		for (;;) {
+			tx = dma->device_prep_dma_pq(chan, dma_dest,
+						     &dma_src[src_off],
+						     pq_src_cnt,
+						     &coefs[src_off], len,
+						     dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
+
+		/* drop completed sources */
+		src_cnt -= pq_src_cnt;
+		src_off += pq_src_cnt;
+
+		dma_flags |= DMA_PREP_CONTINUE;
+	}
+
+	return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		     size_t len, struct async_submit_ctl *submit)
+{
+	void **srcs;
+	int i;
+
+	if (submit->scribble)
+		srcs = submit->scribble;
+	else
+		srcs = (void **) blocks;
+
+	for (i = 0; i < disks; i++) {
+		if (is_raid6_zero_block(blocks[i])) {
+			BUG_ON(i > disks - 3); /* P or Q can't be zero */
+			srcs[i] = blocks[i];
+		} else
+			srcs[i] = page_address(blocks[i]) + offset;
+	}
+	raid6_call.gen_syndrome(disks, len, srcs);
+	async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path.  'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, struct async_submit_ctl *submit)
+{
+	int src_cnt = disks - 2;
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &P(blocks, disks), 2,
+						      blocks, src_cnt, len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device &&
+	    (src_cnt <= dma_maxpq(device, 0) ||
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+		/* run the p+q asynchronously */
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+					     disks, len, dma_src, submit);
+	}
+
+	/* run the pq synchronously */
+	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+	/* wait for any prerequisite operations */
+	async_tx_quiesce(&submit->depend_tx);
+
+	if (!P(blocks, disks)) {
+		P(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	if (!Q(blocks, disks)) {
+		Q(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine.  The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+						      NULL, 0,  blocks, disks,
+						      len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	struct dma_async_tx_descriptor *tx;
+	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks < 4);
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+		struct device *dev = device->dev;
+		dma_addr_t *pq = &dma_src[disks-2];
+		int i;
+
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		if (!P(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_P;
+		if (!Q(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		for (i = 0; i < disks; i++)
+			if (likely(blocks[i])) {
+				BUG_ON(is_raid6_zero_block(blocks[i]));
+				dma_src[i] = dma_map_page(dev, blocks[i],
+							  offset, len,
+							  DMA_TO_DEVICE);
+			}
+
+		for (;;) {
+			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+							    disks - 2,
+							    raid6_gfexp,
+							    len, pqres,
+							    dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+		async_tx_submit(chan, tx, submit);
+
+		return tx;
+	} else {
+		struct page *p_src = P(blocks, disks);
+		struct page *q_src = Q(blocks, disks);
+		enum async_tx_flags flags_orig = submit->flags;
+		dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+		void *scribble = submit->scribble;
+		void *cb_param_orig = submit->cb_param;
+		void *p, *q, *s;
+
+		pr_debug("%s: (sync) disks: %d len: %zu\n",
+			 __func__, disks, len);
+
+		/* caller must provide a temporary result buffer and
+		 * allow the input parameters to be preserved
+		 */
+		BUG_ON(!spare || !scribble);
+
+		/* wait for any prerequisite operations */
+		async_tx_quiesce(&submit->depend_tx);
+
+		/* recompute p and/or q into the temporary buffer and then
+		 * check to see the result matches the current value
+		 */
+		tx = NULL;
+		*pqres = 0;
+		if (p_src) {
+			init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+					  NULL, NULL, scribble);
+			tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+			async_tx_quiesce(&tx);
+			p = page_address(p_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+		}
+
+		if (q_src) {
+			P(blocks, disks) = NULL;
+			Q(blocks, disks) = spare;
+			init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+			tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+			async_tx_quiesce(&tx);
+			q = page_address(q_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+		}
+
+		/* restore P, Q and submit */
+		P(blocks, disks) = p_src;
+		Q(blocks, disks) = q_src;
+
+		submit->cb_fn = cb_fn_orig;
+		submit->cb_param = cb_param_orig;
+		submit->flags = flags_orig;
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+	scribble = alloc_page(GFP_KERNEL);
+
+	if (scribble)
+		return 0;
+
+	pr_err("%s: failed to allocate required spare page\n", __func__);
+
+	return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+	put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 78fb7780272a..56b5f98da463 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	while (src_cnt) {
 		submit->flags = flags_orig;
 		dma_flags = 0;
-		xor_src_cnt = min(src_cnt, dma->max_xor);
+		xor_src_cnt = min(src_cnt, (int)dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
 		 * clear the callback parameters, and leave the destination
 		 * buffer mapped
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index e002e0e0d055..cd5673d3043b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_xor);
 	BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
 		!device->device_prep_dma_xor_val);
+	BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+		!device->device_prep_dma_pq);
+	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+		!device->device_prep_dma_pq_val);
 	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 6ff79a672699..4496bc606662 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
 	  "( %s%s%s%s%s%s%s%s%s%s)\n",
-	  dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
+	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
 	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 12a2efcbd565..e6ce5f004f98 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 02447afcebad..ce010cd991d2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
 enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
-	DMA_PQ_XOR,
+	DMA_PQ,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- * 	control completion, and communicate status.
+ *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- * 	this transaction
+ *  this transaction
  * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- * 	acknowledges receipt, i.e. has has a chance to establish any
- * 	dependency chains
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
 	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+	DMA_PREP_PQ_DISABLE_P = (1 << 4),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 5),
+	DMA_PREP_CONTINUE = (1 << 6),
 };
 
 /**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
  * @global_node: list_head for global dma_device_list
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
  * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
 	struct list_head channels;
 	struct list_head global_node;
 	dma_cap_mask_t  cap_mask;
-	int max_xor;
+	unsigned short max_xor;
+	unsigned short max_pq;
+	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
 	struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, enum sum_check_flags *result, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf,
+		size_t len, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		enum sum_check_flags *pqres, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+	dma->max_pq = maxpq;
+	if (has_pq_continue)
+		dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+	return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+	enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+	return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+	if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma);
+	else if (dmaf_p_disabled_continue(flags))
+		return dma_dev_to_maxpq(dma) - 1;
+	else if (dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma) - 3;
+	BUG();
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:37 -0700
Subject: async_tx: add support for asynchronous RAID6 recovery operations

 async_raid6_2data_recov() recovers two data disk failures

 async_raid6_datap_recov() recovers a data disk and the P disk

These routines are a port of the synchronous versions found in
drivers/md/raid6recov.c.  The primary difference is breaking out the xor
operations into separate calls to async_xor.  Two helper routines are
introduced to perform scalar multiplication where needed.
async_sum_product() multiplies two sources by scalar coefficients and
then sums (xor) the result.  async_mult() simply multiplies a single
source by a scalar.

This implemention also includes, in contrast to the original
synchronous-only code, special case handling for the 4-disk and 5-disk
array cases.  In these situations the default N-disk algorithm will
present 0-source or 1-source operations to dma devices.  To cover for
dma devices where the minimum source count is 2 we implement 4-disk and
5-disk handling in the recovery code.

[ Impact: asynchronous raid6 recovery routines for 2data and datap cases ]

Cc: Yuri Tikhonov <yur@emcraft.com>
Cc: Ilya Yanok <yanok@emcraft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   4 +
 crypto/async_tx/Kconfig               |   5 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_raid6_recov.c   | 448 ++++++++++++++++++++++++++++++++++
 include/linux/async_tx.h              |   8 +
 5 files changed, 466 insertions(+)
 create mode 100644 crypto/async_tx/async_raid6_recov.c

(limited to 'include/linux')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 0e48e054d69a..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the
 pq	- generate the p+q (raid6 syndrome) from a series of source buffers
 pq_val  - validate that a p and or q buffer are in sync with a given series of
 	  sources
+datap	- (raid6_datap_recov) recover a raid6 data block and the p block
+	  from the given sources
+2data	- (raid6_2data_recov) recover 2 raid6 data blocks from the given
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index cb6d7314f198..e5aeb2b79e6f 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -18,3 +18,8 @@ config ASYNC_PQ
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_RAID6_RECOV
+	tristate
+	select ASYNC_CORE
+	select ASYNC_PQ
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 1b9926588259..9a1a76811b80 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
 obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 000000000000..0c14d48c9896
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,448 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ *   Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+		  size_t len, struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, srcs, 2, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *amul, *bmul;
+	u8 ax, bx;
+	u8 *a, *b, *c;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[2];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* run the operation synchronously */
+	async_tx_quiesce(&submit->depend_tx);
+	amul = raid6_gfmul[coef[0]];
+	bmul = raid6_gfmul[coef[1]];
+	a = page_address(srcs[0]);
+	b = page_address(srcs[1]);
+	c = page_address(dest);
+
+	while (len--) {
+		ax    = amul[*a++];
+		bx    = bmul[*b++];
+		*c++ = ax ^ bx;
+	}
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+	   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, &src, 1, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *qmul; /* Q multiplier table */
+	u8 *d, *s;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[1];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* no channel available, or failed to allocate a descriptor, so
+	 * perform the operation synchronously
+	 */
+	async_tx_quiesce(&submit->depend_tx);
+	qmul  = raid6_gfmul[coef];
+	d = page_address(dest);
+	s = page_address(src);
+
+	while (len--)
+		*d++ = qmul[*s++];
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *a, *b;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[4-2];
+	q = blocks[4-1];
+
+	a = blocks[faila];
+	b = blocks[failb];
+
+	/* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = p;
+	srcs[1] = q;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = p;
+	srcs[1] = b;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+	return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *g, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	int uninitialized_var(good);
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (i == faila || i == failb)
+			continue;
+		else {
+			good = i;
+			break;
+		}
+	}
+	BUG_ON(i >= 3);
+
+	p = blocks[5-2];
+	q = blocks[5-1];
+	g = blocks[good];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for delta p and
+	 * delta q
+	 */
+	dp = blocks[faila];
+	dq = blocks[failb];
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+	      struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-2] = dp;
+	dq = blocks[failb];
+	blocks[failb] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+	/* Restore pointer table */
+	blocks[faila]   = dp;
+	blocks[failb]   = dq;
+	blocks[disks-2] = p;
+	blocks[disks-1] = q;
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	BUG_ON(faila == failb);
+	if (failb < faila)
+		swap(faila, failb);
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!submit->scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	switch (disks) {
+	case 4:
+		/* dma devices do not uniformly understand a zero source pq
+		 * operation (in contrast to the synchronous case), so
+		 * explicitly handle the 4 disk special case
+		 */
+		return __2data_recov_4(bytes, faila, failb, blocks, submit);
+	case 5:
+		/* dma devices do not uniformly understand a single
+		 * source pq operation (in contrast to the synchronous
+		 * case), so explicitly handle the 5 disk special case
+		 */
+		return __2data_recov_5(bytes, faila, failb, blocks, submit);
+	default:
+		return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+	}
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dq;
+	u8 coef;
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	struct page *srcs[2];
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_datap_recov(disks, bytes, faila, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	/* in the 4 disk case we only need to perform a single source
+	 * multiplication
+	 */
+	if (disks == 4) {
+		int good = faila == 0 ? 1 : 0;
+		struct page *g = blocks[good];
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+	} else {
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+	}
+
+	/* Restore pointer table */
+	blocks[faila]   = dq;
+	blocks[disks-1] = q;
+
+	/* calculate g^{-faila} */
+	coef = raid6_gfinv[raid6_gfexp[faila]];
+
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, dq, coef, bytes, submit);
+
+	srcs[0] = p;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index e6ce5f004f98..866e61c4e2e0 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
 		   size_t len, enum sum_check_flags *pqres, struct page *spare,
 		   struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:50 -0700
Subject: dmaengine: add fence support

Some engines optimize operation by reading ahead in the descriptor chain
such that descriptor2 may start execution before descriptor1 completes.
If descriptor2 depends on the result from descriptor1 then a fence is
required (on descriptor2) to disable this optimization.  The async_tx
api could implicitly identify dependencies via the 'depend_tx'
parameter, but that would constrain cases where the dependency chain
only specifies a completion order rather than a data dependency.  So,
provide an ASYNC_TX_FENCE to explicitly identify data dependencies.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c      |  7 ++++--
 crypto/async_tx/async_memset.c      |  7 ++++--
 crypto/async_tx/async_pq.c          |  5 ++++
 crypto/async_tx/async_raid6_recov.c | 47 +++++++++++++++++++++----------------
 crypto/async_tx/async_xor.c         | 11 ++++++---
 drivers/md/raid5.c                  | 37 ++++++++++++++++++-----------
 include/linux/async_tx.h            |  3 +++
 include/linux/dmaengine.h           |  3 +++
 8 files changed, 79 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd0dcb5..b38cbb3fd527 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e5f673..a374784e3329 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21efb499..a25e290c39fb 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 		 */
 		if (src_cnt > pq_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 			if (cb_fn_orig)
 				dma_flags |= DMA_PREP_INTERRUPT;
 		}
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 			dma_flags |= DMA_PREP_PQ_DISABLE_P;
 		if (!Q(blocks, disks))
 			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < disks; i++)
 			if (likely(blocks[i])) {
 				BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48c9896..822a42d10061 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
 		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
 		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = q;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(b, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	dp = blocks[faila];
 	dq = blocks[failb];
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	blocks[failb] = (void *)raid6_empty_zero_page;
 	blocks[disks-1] = dq;
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 
 	/* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 		int good = faila == 0 ? 1 : 0;
 		struct page *g = blocks[good];
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_memcpy(p, g, 0, 0, bytes, submit);
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 	} else {
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 	}
 
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, dq, coef, bytes, submit);
 
 	srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98da463..db279872ef3d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 */
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		}
 		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
-
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward progress
 		 * in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 		dma_src = (dma_addr_t *) src_list;
 
 	if (dma_src && device && src_cnt <= device->max_xor) {
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0a5cf2171214..54ef8d75541d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	int i;
 	int page_offset;
 	struct async_submit_ctl submit;
+	enum async_tx_flags flags = 0;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
 
-	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
+	if (frombio)
+		flags |= ASYNC_TX_FENCE;
+	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 
 	atomic_inc(&sh->count);
 
-	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
 			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
 	if (unlikely(count == 1))
 		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
@@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 		count = set_syndrome_sources(blocks, sh);
 		blocks[count] = NULL; /* regenerating p is not necessary */
 		BUG_ON(blocks[count+1] != dest); /* q should already be set */
-		init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+				  ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
 	} else {
@@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 			blocks[count++] = sh->dev[i].page;
 		}
 
-		init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-				  ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+				  NULL, ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
 	}
@@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 		/* Q disk is one of the missing disks */
 		if (faila == syndrome_disks) {
 			/* Missing P+Q, just recompute */
-			init_async_submit(&submit, 0, NULL, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		} else {
@@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 				blocks[count++] = sh->dev[i].page;
 			}
 			dest = sh->dev[data_target].page;
-			init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-					  NULL, NULL, to_addr_conv(sh, percpu));
+			init_async_submit(&submit,
+					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+					  NULL, NULL, NULL,
+					  to_addr_conv(sh, percpu));
 			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
 				       &submit);
 
 			count = set_syndrome_sources(blocks, sh);
-			init_async_submit(&submit, 0, tx, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		}
 	}
 
-	init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
-			  to_addr_conv(sh, percpu));
+	init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
+			  sh, to_addr_conv(sh, percpu));
 	if (failb == syndrome_disks) {
 		/* We're missing D+P. */
 		return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
@@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
 			xor_srcs[count++] = dev->page;
 	}
 
-	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
 			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 866e61c4e2e0..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,11 +58,14 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
 	ASYNC_TX_ACK		 = (1 << 2),
+	ASYNC_TX_FENCE		 = (1 << 3),
 };
 
 /**
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1012f1abcb54..4d6c1c925fd4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -87,6 +87,8 @@ enum dma_transaction_type {
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
  *  sources that were the result of a previous operation, in the case of a PQ
  *  operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ *  on the result of this operation
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
@@ -98,6 +100,7 @@ enum dma_ctrl_flags {
 	DMA_PREP_PQ_DISABLE_P = (1 << 6),
 	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
 	DMA_PREP_CONTINUE = (1 << 8),
+	DMA_PREP_FENCE = (1 << 9),
 };
 
 /**
-- 
cgit v1.2.3


From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:51 -0700
Subject: dmaengine, async_tx: add a "no channel switch" allocator

Channel switching is problematic for some dmaengine drivers as the
architecture precludes separating the ->prep from ->submit.  In these
cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify
the async_tx allocator to only return channels that support all of the
required asynchronous operations.

For example MD_RAID456=y selects support for asynchronous xor, xor
validate, pq, pq validate, and memcpy.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these
capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to
quickly locate compatible channels with the guarantee that dependency
chains will remain on one channel.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select
channels that lead to operation chains that need to cross channel
boundaries using the async_tx channel switch capability.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c |  4 ++++
 drivers/dma/Kconfig        |  4 ++++
 drivers/dma/dmaengine.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h  | 10 +++++++++-
 4 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 60615fedcf5e..f9cdf04fe7c0 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -81,6 +81,10 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
+	#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	BUG();
+	#endif
+
 	/* first check to see if we can still append to depend_tx */
 	spin_lock_bh(&depend_tx->lock);
 	if (depend_tx->parent && depend_tx->chan == tx->chan) {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 912a51b5cbd3..ddcd9793b25c 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
 
 comment "DMA Devices"
 
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	bool
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
 	select DMA_ENGINE
 	select DCA
+	select ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	help
 	  Enable support for the Intel(R) I/OAT DMA engine present
 	  in recent Intel Xeon chipsets.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 96598479eece..d5bc628d207c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
 }
 EXPORT_SYMBOL(dmaengine_put);
 
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+	/* A device that satisfies this test has channels that will never cause
+	 * an async_tx channel switch event as all possible operation types can
+	 * be handled.
+	 */
+	#ifdef CONFIG_ASYNC_TX_DMA
+	if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+	if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+	if (!dma_has_cap(DMA_XOR, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+	if (!dma_has_cap(DMA_PQ, device->cap_mask))
+		return false;
+	#endif
+
+	return true;
+}
+
 static int get_dma_id(struct dma_device *device)
 {
 	int rc;
@@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
 	BUG_ON(!device->device_issue_pending);
 	BUG_ON(!device->dev);
 
+	/* note: this only matters in the
+	 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+	 */
+	if (device_has_all_tx_types(device))
+		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
 	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
 	if (!idr_ref)
 		return -ENOMEM;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4d6c1c925fd4..86853ed7970b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,6 +48,9 @@ enum dma_status {
 
 /**
  * enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ * automatically set as dma devices are registered.
  */
 enum dma_transaction_type {
 	DMA_MEMCPY,
@@ -61,6 +64,7 @@ enum dma_transaction_type {
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
+	DMA_ASYNC_TX,
 	DMA_SLAVE,
 };
 
@@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void)
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
 #define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
 #else
 static inline void async_dmaengine_get(void)
 {
@@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
 
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
-- 
cgit v1.2.3


From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:52 -0700
Subject: dmaengine: cleanup unused transaction types

No drivers currently implement these operation types, so they can be
deleted.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-iop13xx/setup.c | 7 -------
 arch/arm/plat-iop/adma.c      | 2 --
 drivers/dma/iop-adma.c        | 5 +----
 include/linux/dmaengine.h     | 3 ---
 4 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index faaef95342b6..5c147fb66a01 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_0_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_1_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_2:
@@ -501,13 +497,10 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_2_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
 		}
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index da1dd0dab07c..1ff6a37e893c 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#endif
 
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#endif
 
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 4496bc606662..cecb6d657d55 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1256,15 +1256,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 	}
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
-	  "( %s%s%s%s%s%s%s%s%s%s)\n",
+	  "( %s%s%s%s%s%s%s)\n",
 	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
-	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-	  dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
 	  dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
 	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
-	  dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
 	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 86853ed7970b..db23fd583f98 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -56,12 +56,9 @@ enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
 	DMA_PQ,
-	DMA_DUAL_XOR,
-	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
 	DMA_PQ_VAL,
 	DMA_MEMSET,
-	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
-- 
cgit v1.2.3


From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:53 -0700
Subject: dmaengine, async_tx: support alignment checks

Some engines have transfer size and address alignment restrictions.  Add
a per-operation alignment property to struct dma_device that the async
routines and dmatest can use to check alignment capabilities.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c |  2 +-
 crypto/async_tx/async_memset.c |  2 +-
 crypto/async_tx/async_pq.c     |  6 ++++--
 crypto/async_tx/async_xor.c    |  5 +++--
 drivers/dma/dmatest.c          | 14 ++++++++++++++
 include/linux/dmaengine.h      | 44 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 67 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index b38cbb3fd527..0ec1fb69d4ea 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,7 +50,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		dma_addr_t dma_dest, dma_src;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index a374784e3329..58e4a8752aee 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -47,7 +47,7 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_fill_aligned(device, offset, 0, len)) {
 		dma_addr_t dma_dest;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index a25e290c39fb..b88db6d1dc65 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -211,7 +211,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 
 	if (dma_src && device &&
 	    (src_cnt <= dma_maxpq(device, 0) ||
-	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		/* run the p+q asynchronously */
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
@@ -274,7 +275,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) blocks;
 
-	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct device *dev = device->dev;
 		dma_addr_t *pq = &dma_src[disks-2];
 		int i;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index db279872ef3d..b459a9034aac 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -193,7 +193,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && chan) {
+	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
 		/* run the xor asynchronously */
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
@@ -265,7 +265,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && device && src_cnt <= device->max_xor) {
+	if (dma_src && device && src_cnt <= device->max_xor &&
+	    is_dma_xor_aligned(device, offset, 0, len)) {
 		unsigned long dma_prep_flags = 0;
 		int i;
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 58e49e41c7a3..a3722a7384b5 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -288,6 +288,7 @@ static int dmatest_func(void *data)
 		dma_addr_t dma_dsts[dst_cnt];
 		struct completion cmp;
 		unsigned long tmo = msecs_to_jiffies(3000);
+		u8 align = 0;
 
 		total_tests++;
 
@@ -295,6 +296,18 @@ static int dmatest_func(void *data)
 		src_off = dmatest_random() % (test_buf_size - len + 1);
 		dst_off = dmatest_random() % (test_buf_size - len + 1);
 
+		/* honor alignment restrictions */
+		if (thread->type == DMA_MEMCPY)
+			align = dev->copy_align;
+		else if (thread->type == DMA_XOR)
+			align = dev->xor_align;
+		else if (thread->type == DMA_PQ)
+			align = dev->pq_align;
+
+		len = (len >> align) << align;
+		src_off = (src_off >> align) << align;
+		dst_off = (dst_off >> align) << align;
+
 		dmatest_init_srcs(thread->srcs, src_off, len);
 		dmatest_init_dsts(thread->dsts, dst_off, len);
 
@@ -311,6 +324,7 @@ static int dmatest_func(void *data)
 						     DMA_BIDIRECTIONAL);
 		}
 
+
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
 							 dma_dsts[0] + dst_off,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index db23fd583f98..835b9c7bf1c2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -245,6 +245,10 @@ struct dma_async_tx_descriptor {
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
  * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -271,6 +275,10 @@ struct dma_device {
 	dma_cap_mask_t  cap_mask;
 	unsigned short max_xor;
 	unsigned short max_pq;
+	u8 copy_align;
+	u8 xor_align;
+	u8 pq_align;
+	u8 fill_align;
 	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
@@ -314,6 +322,42 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+	size_t mask;
+
+	if (!align)
+		return true;
+	mask = (1 << align) - 1;
+	if (mask & (off1 | off2 | len))
+		return false;
+	return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+				      size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+				     size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
-- 
cgit v1.2.3


From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001
From: Tom Picard <tom.s.picard@intel.com>
Date: Tue, 8 Sep 2009 17:43:01 -0700
Subject: ioat3: ioat3.2 pci ids for Jasper Forest

Jasper Forest introduces raid offload support via ioat3.2 support.  When
raid offload is enabled two (out of 8 channels) will report raid5/raid6
offload capabilities.  The remaining channels will only report ioat3.0
capabilities (memcpy).

Signed-off-by: Tom Picard <tom.s.picard@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat/pci.c  | 13 +++++++++++++
 include/linux/pci_ids.h | 10 ++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 626a508a4f8b..6c1aac5b3598 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -58,6 +58,19 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
 	{ 0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812d67d3..2b4b8ce53256 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2529,6 +2529,16 @@
 #define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
 #define PCI_DEVICE_ID_INTEL_IOAT_CNB	0x360b
 #define PCI_DEVICE_ID_INTEL_FBD_CNB	0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0	0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1	0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2	0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3	0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4	0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5	0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6	0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7	0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8	0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9	0x3719
 #define PCI_DEVICE_ID_INTEL_ICH10_0	0x3a14
 #define PCI_DEVICE_ID_INTEL_ICH10_1	0x3a16
 #define PCI_DEVICE_ID_INTEL_ICH10_2	0x3a18
-- 
cgit v1.2.3


From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:53:04 -0700
Subject: dmaengine: kill tx_list

The tx_list attribute of struct dma_async_tx_descriptor is common to
most, but not all dma driver implementations.  None of the upper level
code (dmaengine/async_tx) uses it, so allow drivers to implement it
locally if they need it.  This saves sizeof(struct list_head) bytes for
drivers that do not manage descriptors with a linked list (e.g.: ioatdma
v2,3).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 1 -
 include/linux/dmaengine.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4ff..562d182eae66 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -933,7 +933,6 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 {
 	tx->chan = chan;
 	spin_lock_init(&tx->lock);
-	INIT_LIST_HEAD(&tx->tx_list);
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c818..f114bc7790bc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  * @flags: flags to augment operation preparation, control completion, and
  * 	communicate status
  * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *	descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
  * @callback: routine to call after this operation is complete
@@ -195,7 +193,6 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t cookie;
 	enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
 	dma_addr_t phys;
-	struct list_head tx_list;
 	struct dma_chan *chan;
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
-- 
cgit v1.2.3


From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001
From: Maciej Sosnowski <maciej.sosnowski@intel.com>
Date: Thu, 10 Sep 2009 15:05:58 +0200
Subject: dca: registering requesters in multiple dca domains

This patch enables DCA support on multiple-IOH/multiple-IIO architectures.
It modifies dca module by replacing single dca_providers list
with dca_domains list, each domain containing separate list of providers.
This approach lets dca driver manage multiple domains, i.e. sets of providers
and requesters mapped back to the same PCI root complex device.
The driver takes care to register each requester to a provider
from the same domain.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
---
 drivers/dca/dca-core.c | 122 +++++++++++++++++++++++++++++++++++++++++++------
 drivers/dma/ioat/pci.c |   2 +-
 include/linux/dca.h    |  11 ++++-
 3 files changed, 120 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb59..7e318de0904b 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/dca.h>
 
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
 
 MODULE_VERSION(DCA_VERSION);
 MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
 
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
-	struct dca_provider *dca, *ret = NULL;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *bus = pdev->bus;
 
-	list_for_each_entry(dca, &dca_providers, node) {
-		if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-			ret = dca;
-			break;
-		}
+	while (bus->parent)
+		bus = bus->parent;
+
+	return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+	if (!domain)
+		return NULL;
+
+	INIT_LIST_HEAD(&domain->dca_providers);
+	domain->pci_rc = rc;
+
+	return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+	list_del(&domain->node);
+	kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	list_for_each_entry(domain, &dca_domains, node)
+		if (domain->pci_rc == rc)
+			return domain;
+
+	return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(rc);
+
+	if (!domain) {
+		domain = dca_allocate_domain(rc);
+		if (domain)
+			list_add(&domain->node, &dca_domains);
+	}
+
+	return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+	struct dca_provider *dca;
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	if (dev) {
+		rc = dca_pci_rc_from_dev(dev);
+		domain = dca_find_domain(rc);
+		if (!domain)
+			return NULL;
+	} else {
+		if (!list_empty(&dca_domains))
+			domain = list_first_entry(&dca_domains,
+						  struct dca_domain,
+						  node);
+		else
+			return NULL;
 	}
 
-	return ret;
+	list_for_each_entry(dca, &domain->dca_providers, node)
+		if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+			return dca;
+
+	return NULL;
 }
 
 /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
 	struct dca_provider *dca;
 	int err, slot = -ENODEV;
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	if (!dev)
 		return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
 		return -EEXIST;
 	}
 
-	list_for_each_entry(dca, &dca_providers, node) {
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+
+	list_for_each_entry(dca, &domain->dca_providers, node) {
 		slot = dca->ops->add_requester(dca, dev);
 		if (slot >= 0)
 			break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	int err;
 	unsigned long flags;
+	struct dca_domain *domain;
 
 	err = dca_sysfs_add_provider(dca, dev);
 	if (err)
 		return err;
 
 	spin_lock_irqsave(&dca_lock, flags);
-	list_add(&dca->node, &dca_providers);
+	domain = dca_get_domain(dev);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+	list_add(&dca->node, &domain->dca_providers);
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
  * unregister_dca_provider - remove a dca provider
  * @dca - struct created by alloc_dca_provider()
  */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	blocking_notifier_call_chain(&dca_provider_chain,
 				     DCA_PROVIDER_REMOVE, NULL);
 
 	spin_lock_irqsave(&dca_lock, flags);
+
 	list_del(&dca->node);
+
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (list_empty(&domain->dca_providers))
+		dca_free_domain(domain);
+
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	dca_sysfs_remove_provider(dca);
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index c788fa266470..d545fae30f37 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
 
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
-		unregister_dca_provider(device->dca);
+		unregister_dca_provider(device->dca, &pdev->dev);
 		free_dca_provider(device->dca);
 		device->dca = NULL;
 	}
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e87d0a..d27a7a05718d 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
  */
 #ifndef DCA_H
 #define DCA_H
+
+#include <linux/pci.h>
+
 /* DCA Provider API */
 
 /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
 	int			 id;
 };
 
+struct dca_domain {
+	struct list_head	node;
+	struct list_head	dca_providers;
+	struct pci_bus		*pci_rc;
+};
+
 struct dca_ops {
 	int	(*add_requester)    (struct dca_provider *, struct device *);
 	int	(*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
 
 static inline void *dca_priv(struct dca_provider *dca)
 {
-- 
cgit v1.2.3