summaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/Kconfig12
-rw-r--r--crypto/Makefile6
-rw-r--r--crypto/async_tx/Kconfig16
-rw-r--r--crypto/async_tx/Makefile4
-rw-r--r--crypto/async_tx/async_memcpy.c131
-rw-r--r--crypto/async_tx/async_memset.c109
-rw-r--r--crypto/async_tx/async_tx.c497
-rw-r--r--crypto/async_tx/async_xor.c327
-rw-r--r--crypto/xor.c155
9 files changed, 1256 insertions, 1 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 935301ee5ac8..3d1a1e27944f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1,7 +1,17 @@
#
-# Cryptographic API Configuration
+# Generic algorithms support
+#
+config XOR_BLOCKS
+ tristate
+
#
+# async_tx api: hardware offloaded memory transfer/transform support
+#
+source "crypto/async_tx/Kconfig"
+#
+# Cryptographic API Configuration
+#
menuconfig CRYPTO
bool "Cryptographic API"
help
diff --git a/crypto/Makefile b/crypto/Makefile
index cce46a1c9dc7..0cf17f1ea151 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -50,3 +50,9 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
+
+#
+# generic algorithms and the async_tx api
+#
+obj-$(CONFIG_XOR_BLOCKS) += xor.o
+obj-$(CONFIG_ASYNC_CORE) += async_tx/
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
new file mode 100644
index 000000000000..d8fb39145986
--- /dev/null
+++ b/crypto/async_tx/Kconfig
@@ -0,0 +1,16 @@
+config ASYNC_CORE
+ tristate
+
+config ASYNC_MEMCPY
+ tristate
+ select ASYNC_CORE
+
+config ASYNC_XOR
+ tristate
+ select ASYNC_CORE
+ select XOR_BLOCKS
+
+config ASYNC_MEMSET
+ tristate
+ select ASYNC_CORE
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
new file mode 100644
index 000000000000..27baa7d52fbc
--- /dev/null
+++ b/crypto/async_tx/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_ASYNC_CORE) += async_tx.o
+obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
+obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
+obj-$(CONFIG_ASYNC_XOR) += async_xor.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
new file mode 100644
index 000000000000..a973f4ef897d
--- /dev/null
+++ b/crypto/async_tx/async_memcpy.c
@@ -0,0 +1,131 @@
+/*
+ * copy offload engine support
+ *
+ * Copyright © 2006, Intel Corporation.
+ *
+ * Dan Williams <dan.j.williams@intel.com>
+ *
+ * with architecture considerations by:
+ * Neil Brown <neilb@suse.de>
+ * Jeff Garzik <jeff@garzik.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/async_tx.h>
+
+/**
+ * async_memcpy - attempt to copy memory with a dma engine.
+ * @dest: destination page
+ * @src: src page
+ * @offset: offset in pages to start transaction
+ * @len: length in bytes
+ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
+ * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
+ * @depend_tx: memcpy depends on the result of this transaction
+ * @cb_fn: function to call when the memcpy completes
+ * @cb_param: parameter to pass to the callback routine
+ */
+struct dma_async_tx_descriptor *
+async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
+ unsigned int src_offset, size_t len, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
+ struct dma_device *device = chan ? chan->device : NULL;
+ int int_en = cb_fn ? 1 : 0;
+ struct dma_async_tx_descriptor *tx = device ?
+ device->device_prep_dma_memcpy(chan, len,
+ int_en) : NULL;
+
+ if (tx) { /* run the memcpy asynchronously */
+ dma_addr_t addr;
+ enum dma_data_direction dir;
+
+ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
+
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_FROM_DEVICE;
+
+ addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
+ tx->tx_set_dest(addr, tx, 0);
+
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_TO_DEVICE;
+
+ addr = dma_map_page(device->dev, src, src_offset, len, dir);
+ tx->tx_set_src(addr, tx, 0);
+
+ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ } else { /* run the memcpy synchronously */
+ void *dest_buf, *src_buf;
+ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
+
+ /* wait for any prerequisite operations */
+ if (depend_tx) {
+ /* if ack is already set then we cannot be sure
+ * we are referring to the correct operation
+ */
+ BUG_ON(depend_tx->ack);
+ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
+ panic("%s: DMA_ERROR waiting for depend_tx\n",
+ __FUNCTION__);
+ }
+
+ if (flags & ASYNC_TX_KMAP_DST)
+ dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
+ else
+ dest_buf = page_address(dest) + dest_offset;
+
+ if (flags & ASYNC_TX_KMAP_SRC)
+ src_buf = kmap_atomic(src, KM_USER0) + src_offset;
+ else
+ src_buf = page_address(src) + src_offset;
+
+ memcpy(dest_buf, src_buf, len);
+
+ if (flags & ASYNC_TX_KMAP_DST)
+ kunmap_atomic(dest_buf, KM_USER0);
+
+ if (flags & ASYNC_TX_KMAP_SRC)
+ kunmap_atomic(src_buf, KM_USER0);
+
+ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_memcpy);
+
+static int __init async_memcpy_init(void)
+{
+ return 0;
+}
+
+static void __exit async_memcpy_exit(void)
+{
+ do { } while (0);
+}
+
+module_init(async_memcpy_init);
+module_exit(async_memcpy_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("asynchronous memcpy api");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
new file mode 100644
index 000000000000..66ef6351202e
--- /dev/null
+++ b/crypto/async_tx/async_memset.c
@@ -0,0 +1,109 @@
+/*
+ * memory fill offload engine support
+ *
+ * Copyright © 2006, Intel Corporation.
+ *
+ * Dan Williams <dan.j.williams@intel.com>
+ *
+ * with architecture considerations by:
+ * Neil Brown <neilb@suse.de>
+ * Jeff Garzik <jeff@garzik.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/async_tx.h>
+
+/**
+ * async_memset - attempt to fill memory with a dma engine.
+ * @dest: destination page
+ * @val: fill value
+ * @offset: offset in pages to start transaction
+ * @len: length in bytes
+ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @depend_tx: memset depends on the result of this transaction
+ * @cb_fn: function to call when the memcpy completes
+ * @cb_param: parameter to pass to the callback routine
+ */
+struct dma_async_tx_descriptor *
+async_memset(struct page *dest, int val, unsigned int offset,
+ size_t len, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
+ struct dma_device *device = chan ? chan->device : NULL;
+ int int_en = cb_fn ? 1 : 0;
+ struct dma_async_tx_descriptor *tx = device ?
+ device->device_prep_dma_memset(chan, val, len,
+ int_en) : NULL;
+
+ if (tx) { /* run the memset asynchronously */
+ dma_addr_t dma_addr;
+ enum dma_data_direction dir;
+
+ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_FROM_DEVICE;
+
+ dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
+ tx->tx_set_dest(dma_addr, tx, 0);
+
+ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ } else { /* run the memset synchronously */
+ void *dest_buf;
+ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
+
+ dest_buf = (void *) (((char *) page_address(dest)) + offset);
+
+ /* wait for any prerequisite operations */
+ if (depend_tx) {
+ /* if ack is already set then we cannot be sure
+ * we are referring to the correct operation
+ */
+ BUG_ON(depend_tx->ack);
+ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
+ panic("%s: DMA_ERROR waiting for depend_tx\n",
+ __FUNCTION__);
+ }
+
+ memset(dest_buf, val, len);
+
+ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_memset);
+
+static int __init async_memset_init(void)
+{
+ return 0;
+}
+
+static void __exit async_memset_exit(void)
+{
+ do { } while (0);
+}
+
+module_init(async_memset_init);
+module_exit(async_memset_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("asynchronous memset api");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
new file mode 100644
index 000000000000..035007145e78
--- /dev/null
+++ b/crypto/async_tx/async_tx.c
@@ -0,0 +1,497 @@
+/*
+ * core routines for the asynchronous memory transfer/transform api
+ *
+ * Copyright © 2006, Intel Corporation.
+ *
+ * Dan Williams <dan.j.williams@intel.com>
+ *
+ * with architecture considerations by:
+ * Neil Brown <neilb@suse.de>
+ * Jeff Garzik <jeff@garzik.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/async_tx.h>
+
+#ifdef CONFIG_DMA_ENGINE
+static enum dma_state_client
+dma_channel_add_remove(struct dma_client *client,
+ struct dma_chan *chan, enum dma_state state);
+
+static struct dma_client async_tx_dma = {
+ .event_callback = dma_channel_add_remove,
+ /* .cap_mask == 0 defaults to all channels */
+};
+
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * chan_ref_percpu - tracks channel allocations per core/opertion
+ */
+struct chan_ref_percpu {
+ struct dma_chan_ref *ref;
+};
+
+static int channel_table_initialized;
+static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
+
+/**
+ * async_tx_lock - protect modification of async_tx_master_list and serialize
+ * rebalance operations
+ */
+static spinlock_t async_tx_lock;
+
+static struct list_head
+async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
+
+/* async_tx_issue_pending_all - start all transactions on all channels */
+void async_tx_issue_pending_all(void)
+{
+ struct dma_chan_ref *ref;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
+ ref->chan->device->device_issue_pending(ref->chan);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
+
+/* dma_wait_for_async_tx - spin wait for a transcation to complete
+ * @tx: transaction to wait on
+ */
+enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+ enum dma_status status;
+ struct dma_async_tx_descriptor *iter;
+
+ if (!tx)
+ return DMA_SUCCESS;
+
+ /* poll through the dependency chain, return when tx is complete */
+ do {
+ iter = tx;
+ while (iter->cookie == -EBUSY)
+ iter = iter->parent;
+
+ status = dma_sync_wait(iter->chan, iter->cookie);
+ } while (status == DMA_IN_PROGRESS || (iter != tx));
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
+
+/* async_tx_run_dependencies - helper routine for dma drivers to process
+ * (start) dependent operations on their target channel
+ * @tx: transaction with dependencies
+ */
+void
+async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
+ struct dma_device *dev;
+ struct dma_chan *chan;
+
+ list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
+ depend_node) {
+ chan = dep_tx->chan;
+ dev = chan->device;
+ /* we can't depend on ourselves */
+ BUG_ON(chan == tx->chan);
+ list_del(&dep_tx->depend_node);
+ tx->tx_submit(dep_tx);
+
+ /* we need to poke the engine as client code does not
+ * know about dependency submission events
+ */
+ dev->device_issue_pending(chan);
+ }
+}
+EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
+
+static void
+free_dma_chan_ref(struct rcu_head *rcu)
+{
+ struct dma_chan_ref *ref;
+ ref = container_of(rcu, struct dma_chan_ref, rcu);
+ kfree(ref);
+}
+
+static void
+init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
+{
+ INIT_LIST_HEAD(&ref->node);
+ INIT_RCU_HEAD(&ref->rcu);
+ ref->chan = chan;
+ atomic_set(&ref->count, 0);
+}
+
+/**
+ * get_chan_ref_by_cap - returns the nth channel of the given capability
+ * defaults to returning the channel with the desired capability and the
+ * lowest reference count if the index can not be satisfied
+ * @cap: capability to match
+ * @index: nth channel desired, passing -1 has the effect of forcing the
+ * default return value
+ */
+static struct dma_chan_ref *
+get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
+{
+ struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
+ if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+ if (!min_ref)
+ min_ref = ref;
+ else if (atomic_read(&ref->count) <
+ atomic_read(&min_ref->count))
+ min_ref = ref;
+
+ if (index-- == 0) {
+ ret_ref = ref;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!ret_ref)
+ ret_ref = min_ref;
+
+ if (ret_ref)
+ atomic_inc(&ret_ref->count);
+
+ return ret_ref;
+}
+
+/**
+ * async_tx_rebalance - redistribute the available channels, optimize
+ * for cpu isolation in the SMP case, and opertaion isolation in the
+ * uniprocessor case
+ */
+static void async_tx_rebalance(void)
+{
+ int cpu, cap, cpu_idx = 0;
+ unsigned long flags;
+
+ if (!channel_table_initialized)
+ return;
+
+ spin_lock_irqsave(&async_tx_lock, flags);
+
+ /* undo the last distribution */
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_possible_cpu(cpu) {
+ struct dma_chan_ref *ref =
+ per_cpu_ptr(channel_table[cap], cpu)->ref;
+ if (ref) {
+ atomic_set(&ref->count, 0);
+ per_cpu_ptr(channel_table[cap], cpu)->ref =
+ NULL;
+ }
+ }
+
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_online_cpu(cpu) {
+ struct dma_chan_ref *new;
+ if (NR_CPUS > 1)
+ new = get_chan_ref_by_cap(cap, cpu_idx++);
+ else
+ new = get_chan_ref_by_cap(cap, -1);
+
+ per_cpu_ptr(channel_table[cap], cpu)->ref = new;
+ }
+
+ spin_unlock_irqrestore(&async_tx_lock, flags);
+}
+
+static enum dma_state_client
+dma_channel_add_remove(struct dma_client *client,
+ struct dma_chan *chan, enum dma_state state)
+{
+ unsigned long found, flags;
+ struct dma_chan_ref *master_ref, *ref;
+ enum dma_state_client ack = DMA_DUP; /* default: take no action */
+
+ switch (state) {
+ case DMA_RESOURCE_AVAILABLE:
+ found = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
+ if (ref->chan == chan) {
+ found = 1;
+ break;
+ }
+ rcu_read_unlock();
+
+ pr_debug("async_tx: dma resource available [%s]\n",
+ found ? "old" : "new");
+
+ if (!found)
+ ack = DMA_ACK;
+ else
+ break;
+
+ /* add the channel to the generic management list */
+ master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
+ if (master_ref) {
+ /* keep a reference until async_tx is unloaded */
+ dma_chan_get(chan);
+ init_dma_chan_ref(master_ref, chan);
+ spin_lock_irqsave(&async_tx_lock, flags);
+ list_add_tail_rcu(&master_ref->node,
+ &async_tx_master_list);
+ spin_unlock_irqrestore(&async_tx_lock,
+ flags);
+ } else {
+ printk(KERN_WARNING "async_tx: unable to create"
+ " new master entry in response to"
+ " a DMA_RESOURCE_ADDED event"
+ " (-ENOMEM)\n");
+ return 0;
+ }
+
+ async_tx_rebalance();
+ break;
+ case DMA_RESOURCE_REMOVED:
+ found = 0;
+ spin_lock_irqsave(&async_tx_lock, flags);
+ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
+ if (ref->chan == chan) {
+ /* permit backing devices to go away */
+ dma_chan_put(ref->chan);
+ list_del_rcu(&ref->node);
+ call_rcu(&ref->rcu, free_dma_chan_ref);
+ found = 1;
+ break;
+ }
+ spin_unlock_irqrestore(&async_tx_lock, flags);
+
+ pr_debug("async_tx: dma resource removed [%s]\n",
+ found ? "ours" : "not ours");
+
+ if (found)
+ ack = DMA_ACK;
+ else
+ break;
+
+ async_tx_rebalance();
+ break;
+ case DMA_RESOURCE_SUSPEND:
+ case DMA_RESOURCE_RESUME:
+ printk(KERN_WARNING "async_tx: does not support dma channel"
+ " suspend/resume\n");
+ break;
+ default:
+ BUG();
+ }
+
+ return ack;
+}
+
+static int __init
+async_tx_init(void)
+{
+ enum dma_transaction_type cap;
+
+ spin_lock_init(&async_tx_lock);
+ bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+ /* an interrupt will never be an explicit operation type.
+ * clearing this bit prevents allocation to a slot in 'channel_table'
+ */
+ clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+
+ for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+ channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
+ if (!channel_table[cap])
+ goto err;
+ }
+
+ channel_table_initialized = 1;
+ dma_async_client_register(&async_tx_dma);
+ dma_async_client_chan_request(&async_tx_dma);
+
+ printk(KERN_INFO "async_tx: api initialized (async)\n");
+
+ return 0;
+err:
+ printk(KERN_ERR "async_tx: initialization failure\n");
+
+ while (--cap >= 0)
+ free_percpu(channel_table[cap]);
+
+ return 1;
+}
+
+static void __exit async_tx_exit(void)
+{
+ enum dma_transaction_type cap;
+
+ channel_table_initialized = 0;
+
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ if (channel_table[cap])
+ free_percpu(channel_table[cap]);
+
+ dma_async_client_unregister(&async_tx_dma);
+}
+
+/**
+ * async_tx_find_channel - find a channel to carry out the operation or let
+ * the transaction execute synchronously
+ * @depend_tx: transaction dependency
+ * @tx_type: transaction type
+ */
+struct dma_chan *
+async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
+ enum dma_transaction_type tx_type)
+{
+ /* see if we can keep the chain on one channel */
+ if (depend_tx &&
+ dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
+ return depend_tx->chan;
+ else if (likely(channel_table_initialized)) {
+ struct dma_chan_ref *ref;
+ int cpu = get_cpu();
+ ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
+ put_cpu();
+ return ref ? ref->chan : NULL;
+ } else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(async_tx_find_channel);
+#else
+static int __init async_tx_init(void)
+{
+ printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
+ return 0;
+}
+
+static void __exit async_tx_exit(void)
+{
+ do { } while (0);
+}
+#endif
+
+void
+async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ tx->callback = cb_fn;
+ tx->callback_param = cb_param;
+
+ /* set this new tx to run after depend_tx if:
+ * 1/ a dependency exists (depend_tx is !NULL)
+ * 2/ the tx can not be submitted to the current channel
+ */
+ if (depend_tx && depend_tx->chan != chan) {
+ /* if ack is already set then we cannot be sure
+ * we are referring to the correct operation
+ */
+ BUG_ON(depend_tx->ack);
+
+ tx->parent = depend_tx;
+ spin_lock_bh(&depend_tx->lock);
+ list_add_tail(&tx->depend_node, &depend_tx->depend_list);
+ if (depend_tx->cookie == 0) {
+ struct dma_chan *dep_chan = depend_tx->chan;
+ struct dma_device *dep_dev = dep_chan->device;
+ dep_dev->device_dependency_added(dep_chan);
+ }
+ spin_unlock_bh(&depend_tx->lock);
+
+ /* schedule an interrupt to trigger the channel switch */
+ async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
+ } else {
+ tx->parent = NULL;
+ tx->tx_submit(tx);
+ }
+
+ if (flags & ASYNC_TX_ACK)
+ async_tx_ack(tx);
+
+ if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+ async_tx_ack(depend_tx);
+}
+EXPORT_SYMBOL_GPL(async_tx_submit);
+
+/**
+ * async_trigger_callback - schedules the callback function to be run after
+ * any dependent operations have been completed.
+ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @depend_tx: 'callback' requires the completion of this transaction
+ * @cb_fn: function to call after depend_tx completes
+ * @cb_param: parameter to pass to the callback routine
+ */
+struct dma_async_tx_descriptor *
+async_trigger_callback(enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ struct dma_chan *chan;
+ struct dma_device *device;
+ struct dma_async_tx_descriptor *tx;
+
+ if (depend_tx) {
+ chan = depend_tx->chan;
+ device = chan->device;
+
+ /* see if we can schedule an interrupt
+ * otherwise poll for completion
+ */
+ if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ device = NULL;
+
+ tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
+ } else
+ tx = NULL;
+
+ if (tx) {
+ pr_debug("%s: (async)\n", __FUNCTION__);
+
+ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ } else {
+ pr_debug("%s: (sync)\n", __FUNCTION__);
+
+ /* wait for any prerequisite operations */
+ if (depend_tx) {
+ /* if ack is already set then we cannot be sure
+ * we are referring to the correct operation
+ */
+ BUG_ON(depend_tx->ack);
+ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
+ panic("%s: DMA_ERROR waiting for depend_tx\n",
+ __FUNCTION__);
+ }
+
+ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_trigger_callback);
+
+module_init(async_tx_init);
+module_exit(async_tx_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
new file mode 100644
index 000000000000..2575f674dcd5
--- /dev/null
+++ b/crypto/async_tx/async_xor.c
@@ -0,0 +1,327 @@
+/*
+ * xor offload engine api
+ *
+ * Copyright © 2006, Intel Corporation.
+ *
+ * Dan Williams <dan.j.williams@intel.com>
+ *
+ * with architecture considerations by:
+ * Neil Brown <neilb@suse.de>
+ * Jeff Garzik <jeff@garzik.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/xor.h>
+#include <linux/async_tx.h>
+
+static void
+do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
+ struct dma_chan *chan, struct page *dest, struct page **src_list,
+ unsigned int offset, unsigned int src_cnt, size_t len,
+ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ dma_addr_t dma_addr;
+ enum dma_data_direction dir;
+ int i;
+
+ pr_debug("%s: len: %zu\n", __FUNCTION__, len);
+
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_FROM_DEVICE;
+
+ dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
+ tx->tx_set_dest(dma_addr, tx, 0);
+
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_TO_DEVICE;
+
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr = dma_map_page(device->dev, src_list[i],
+ offset, len, dir);
+ tx->tx_set_src(dma_addr, tx, i);
+ }
+
+ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+}
+
+static void
+do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
+ unsigned int src_cnt, size_t len, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ void *_dest;
+ int i;
+
+ pr_debug("%s: len: %zu\n", __FUNCTION__, len);
+
+ /* reuse the 'src_list' array to convert to buffer pointers */
+ for (i = 0; i < src_cnt; i++)
+ src_list[i] = (struct page *)
+ (page_address(src_list[i]) + offset);
+
+ /* set destination address */
+ _dest = page_address(dest) + offset;
+
+ if (flags & ASYNC_TX_XOR_ZERO_DST)
+ memset(_dest, 0, len);
+
+ xor_blocks(src_cnt, len, _dest,
+ (void **) src_list);
+
+ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+}
+
+/**
+ * async_xor - attempt to xor a set of blocks with a dma engine.
+ * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
+ * flag must be set to not include dest data in the calculation. The
+ * assumption with dma eninges is that they only use the destination
+ * buffer as a source when it is explicity specified in the source list.
+ * @dest: destination page
+ * @src_list: array of source pages (if the dest is also a source it must be
+ * at index zero). The contents of this array may be overwritten.
+ * @offset: offset in pages to start transaction
+ * @src_cnt: number of source pages
+ * @len: length in bytes
+ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
+ * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @depend_tx: xor depends on the result of this transaction.
+ * @cb_fn: function to call when the xor completes
+ * @cb_param: parameter to pass to the callback routine
+ */
+struct dma_async_tx_descriptor *
+async_xor(struct page *dest, struct page **src_list, unsigned int offset,
+ int src_cnt, size_t len, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
+ struct dma_device *device = chan ? chan->device : NULL;
+ struct dma_async_tx_descriptor *tx = NULL;
+ dma_async_tx_callback _cb_fn;
+ void *_cb_param;
+ unsigned long local_flags;
+ int xor_src_cnt;
+ int i = 0, src_off = 0, int_en;
+
+ BUG_ON(src_cnt <= 1);
+
+ while (src_cnt) {
+ local_flags = flags;
+ if (device) { /* run the xor asynchronously */
+ xor_src_cnt = min(src_cnt, device->max_xor);
+ /* if we are submitting additional xors
+ * only set the callback on the last transaction
+ */
+ if (src_cnt > xor_src_cnt) {
+ local_flags &= ~ASYNC_TX_ACK;
+ _cb_fn = NULL;
+ _cb_param = NULL;
+ } else {
+ _cb_fn = cb_fn;
+ _cb_param = cb_param;
+ }
+
+ int_en = _cb_fn ? 1 : 0;
+
+ tx = device->device_prep_dma_xor(
+ chan, xor_src_cnt, len, int_en);
+
+ if (tx) {
+ do_async_xor(tx, device, chan, dest,
+ &src_list[src_off], offset, xor_src_cnt, len,
+ local_flags, depend_tx, _cb_fn,
+ _cb_param);
+ } else /* fall through */
+ goto xor_sync;
+ } else { /* run the xor synchronously */
+xor_sync:
+ /* in the sync case the dest is an implied source
+ * (assumes the dest is at the src_off index)
+ */
+ if (flags & ASYNC_TX_XOR_DROP_DST) {
+ src_cnt--;
+ src_off++;
+ }
+
+ /* process up to 'MAX_XOR_BLOCKS' sources */
+ xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
+
+ /* if we are submitting additional xors
+ * only set the callback on the last transaction
+ */
+ if (src_cnt > xor_src_cnt) {
+ local_flags &= ~ASYNC_TX_ACK;
+ _cb_fn = NULL;
+ _cb_param = NULL;
+ } else {
+ _cb_fn = cb_fn;
+ _cb_param = cb_param;
+ }
+
+ /* wait for any prerequisite operations */
+ if (depend_tx) {
+ /* if ack is already set then we cannot be sure
+ * we are referring to the correct operation
+ */
+ BUG_ON(depend_tx->ack);
+ if (dma_wait_for_async_tx(depend_tx) ==
+ DMA_ERROR)
+ panic("%s: DMA_ERROR waiting for "
+ "depend_tx\n",
+ __FUNCTION__);
+ }
+
+ do_sync_xor(dest, &src_list[src_off], offset,
+ xor_src_cnt, len, local_flags, depend_tx,
+ _cb_fn, _cb_param);
+ }
+
+ /* the previous tx is hidden from the client,
+ * so ack it
+ */
+ if (i && depend_tx)
+ async_tx_ack(depend_tx);
+
+ depend_tx = tx;
+
+ if (src_cnt > xor_src_cnt) {
+ /* drop completed sources */
+ src_cnt -= xor_src_cnt;
+ src_off += xor_src_cnt;
+
+ /* unconditionally preserve the destination */
+ flags &= ~ASYNC_TX_XOR_ZERO_DST;
+
+ /* use the intermediate result a source, but remember
+ * it's dropped, because it's implied, in the sync case
+ */
+ src_list[--src_off] = dest;
+ src_cnt++;
+ flags |= ASYNC_TX_XOR_DROP_DST;
+ } else
+ src_cnt = 0;
+ i++;
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_xor);
+
+static int page_is_zero(struct page *p, unsigned int offset, size_t len)
+{
+ char *a = page_address(p) + offset;
+ return ((*(u32 *) a) == 0 &&
+ memcmp(a, a + 4, len - 4) == 0);
+}
+
+/**
+ * async_xor_zero_sum - attempt a xor parity check with a dma engine.
+ * @dest: destination page used if the xor is performed synchronously
+ * @src_list: array of source pages. The dest page must be listed as a source
+ * at index zero. The contents of this array may be overwritten.
+ * @offset: offset in pages to start transaction
+ * @src_cnt: number of source pages
+ * @len: length in bytes
+ * @result: 0 if sum == 0 else non-zero
+ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @depend_tx: xor depends on the result of this transaction.
+ * @cb_fn: function to call when the xor completes
+ * @cb_param: parameter to pass to the callback routine
+ */
+struct dma_async_tx_descriptor *
+async_xor_zero_sum(struct page *dest, struct page **src_list,
+ unsigned int offset, int src_cnt, size_t len,
+ u32 *result, enum async_tx_flags flags,
+ struct dma_async_tx_descriptor *depend_tx,
+ dma_async_tx_callback cb_fn, void *cb_param)
+{
+ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
+ struct dma_device *device = chan ? chan->device : NULL;
+ int int_en = cb_fn ? 1 : 0;
+ struct dma_async_tx_descriptor *tx = device ?
+ device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
+ int_en) : NULL;
+ int i;
+
+ BUG_ON(src_cnt <= 1);
+
+ if (tx) {
+ dma_addr_t dma_addr;
+ enum dma_data_direction dir;
+
+ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
+
+ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
+ DMA_NONE : DMA_TO_DEVICE;
+
+ for (i = 0; i < src_cnt; i++) {
+ dma_addr = dma_map_page(device->dev, src_list[i],
+ offset, len, dir);
+ tx->tx_set_src(dma_addr, tx, i);
+ }
+
+ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+ } else {
+ unsigned long xor_flags = flags;
+
+ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
+
+ xor_flags |= ASYNC_TX_XOR_DROP_DST;
+ xor_flags &= ~ASYNC_TX_ACK;
+
+ tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
+ depend_tx, NULL, NULL);
+
+ if (tx) {
+ if (dma_wait_for_async_tx(tx) == DMA_ERROR)
+ panic("%s: DMA_ERROR waiting for tx\n",
+ __FUNCTION__);
+ async_tx_ack(tx);
+ }
+
+ *result = page_is_zero(dest, offset, len) ? 0 : 1;
+
+ tx = NULL;
+
+ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+ }
+
+ return tx;
+}
+EXPORT_SYMBOL_GPL(async_xor_zero_sum);
+
+static int __init async_xor_init(void)
+{
+ return 0;
+}
+
+static void __exit async_xor_exit(void)
+{
+ do { } while (0);
+}
+
+module_init(async_xor_init);
+module_exit(async_xor_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
+MODULE_LICENSE("GPL");
diff --git a/crypto/xor.c b/crypto/xor.c
new file mode 100644
index 000000000000..b2e6db075e49
--- /dev/null
+++ b/crypto/xor.c
@@ -0,0 +1,155 @@
+/*
+ * xor.c : Multiple Devices driver for Linux
+ *
+ * Copyright (C) 1996, 1997, 1998, 1999, 2000,
+ * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
+ *
+ * Dispatch optimized RAID-5 checksumming functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define BH_TRACE 0
+#include <linux/module.h>
+#include <linux/raid/md.h>
+#include <linux/raid/xor.h>
+#include <asm/xor.h>
+
+/* The xor routines to use. */
+static struct xor_block_template *active_template;
+
+void
+xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
+{
+ unsigned long *p1, *p2, *p3, *p4;
+
+ p1 = (unsigned long *) srcs[0];
+ if (src_count == 1) {
+ active_template->do_2(bytes, dest, p1);
+ return;
+ }
+
+ p2 = (unsigned long *) srcs[1];
+ if (src_count == 2) {
+ active_template->do_3(bytes, dest, p1, p2);
+ return;
+ }
+
+ p3 = (unsigned long *) srcs[2];
+ if (src_count == 3) {
+ active_template->do_4(bytes, dest, p1, p2, p3);
+ return;
+ }
+
+ p4 = (unsigned long *) srcs[3];
+ active_template->do_5(bytes, dest, p1, p2, p3, p4);
+}
+EXPORT_SYMBOL(xor_blocks);
+
+/* Set of all registered templates. */
+static struct xor_block_template *template_list;
+
+#define BENCH_SIZE (PAGE_SIZE)
+
+static void
+do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
+{
+ int speed;
+ unsigned long now;
+ int i, count, max;
+
+ tmpl->next = template_list;
+ template_list = tmpl;
+
+ /*
+ * Count the number of XORs done during a whole jiffy, and use
+ * this to calculate the speed of checksumming. We use a 2-page
+ * allocation to have guaranteed color L1-cache layout.
+ */
+ max = 0;
+ for (i = 0; i < 5; i++) {
+ now = jiffies;
+ count = 0;
+ while (jiffies == now) {
+ mb(); /* prevent loop optimzation */
+ tmpl->do_2(BENCH_SIZE, b1, b2);
+ mb();
+ count++;
+ mb();
+ }
+ if (count > max)
+ max = count;
+ }
+
+ speed = max * (HZ * BENCH_SIZE / 1024);
+ tmpl->speed = speed;
+
+ printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name,
+ speed / 1000, speed % 1000);
+}
+
+static int __init
+calibrate_xor_blocks(void)
+{
+ void *b1, *b2;
+ struct xor_block_template *f, *fastest;
+
+ b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
+ if (!b1) {
+ printk(KERN_WARNING "xor: Yikes! No memory available.\n");
+ return -ENOMEM;
+ }
+ b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
+
+ /*
+ * If this arch/cpu has a short-circuited selection, don't loop through
+ * all the possible functions, just test the best one
+ */
+
+ fastest = NULL;
+
+#ifdef XOR_SELECT_TEMPLATE
+ fastest = XOR_SELECT_TEMPLATE(fastest);
+#endif
+
+#define xor_speed(templ) do_xor_speed((templ), b1, b2)
+
+ if (fastest) {
+ printk(KERN_INFO "xor: automatically using best "
+ "checksumming function: %s\n",
+ fastest->name);
+ xor_speed(fastest);
+ } else {
+ printk(KERN_INFO "xor: measuring software checksum speed\n");
+ XOR_TRY_TEMPLATES;
+ fastest = template_list;
+ for (f = fastest; f; f = f->next)
+ if (f->speed > fastest->speed)
+ fastest = f;
+ }
+
+ printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
+ fastest->name, fastest->speed / 1000, fastest->speed % 1000);
+
+#undef xor_speed
+
+ free_pages((unsigned long)b1, 2);
+
+ active_template = fastest;
+ return 0;
+}
+
+static __exit void xor_exit(void) { }
+
+MODULE_LICENSE("GPL");
+
+/* when built-in xor.o must initialize before drivers/md/md.o */
+core_initcall(calibrate_xor_blocks);
+module_exit(xor_exit);