diff options
Diffstat (limited to 'drivers')
42 files changed, 717 insertions, 647 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index de41d7928bff..bfe0731f3764 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -285,8 +285,9 @@ config INTEL_IDMA64 config INTEL_IDXD tristate "Intel Data Accelerators support" depends on PCI && X86_64 + depends on PCI_MSI + depends on SBITMAP select DMA_ENGINE - select SBITMAP help Enable support for the Intel(R) data accelerators present in Intel Xeon CPU. diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index 8a05db3343d3..35f4804ea4af 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -358,19 +358,12 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, { struct acpi_dma_parser_data pdata; struct acpi_dma_spec *dma_spec = &pdata.dma_spec; + struct acpi_device *adev = ACPI_COMPANION(dev); struct list_head resource_list; - struct acpi_device *adev; struct acpi_dma *adma; struct dma_chan *chan = NULL; int found; - - /* Check if the device was enumerated by ACPI */ - if (!dev) - return ERR_PTR(-ENODEV); - - adev = ACPI_COMPANION(dev); - if (!adev) - return ERR_PTR(-ENODEV); + int ret; memset(&pdata, 0, sizeof(pdata)); pdata.index = index; @@ -380,9 +373,11 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, dma_spec->slave_id = -1; INIT_LIST_HEAD(&resource_list); - acpi_dev_get_resources(adev, &resource_list, - acpi_dma_parse_fixed_dma, &pdata); + ret = acpi_dev_get_resources(adev, &resource_list, + acpi_dma_parse_fixed_dma, &pdata); acpi_dev_free_resource_list(&resource_list); + if (ret < 0) + return ERR_PTR(ret); if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0) return ERR_PTR(-ENODEV); diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 539e785039ca..321ac3a7aa41 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -153,7 +153,8 @@ struct msgdma_extended_desc { * struct msgdma_sw_desc - implements a sw descriptor * @async_tx: support for the async_tx api * @hw_desc: assosiated HW descriptor - * @free_list: node of the free SW descriprots list + * @node: node to move from the free list to the tx list + * @tx_list: transmit list node */ struct msgdma_sw_desc { struct dma_async_tx_descriptor async_tx; @@ -162,7 +163,7 @@ struct msgdma_sw_desc { struct list_head tx_list; }; -/** +/* * struct msgdma_device - DMA device structure */ struct msgdma_device { @@ -258,6 +259,7 @@ static void msgdma_free_desc_list(struct msgdma_device *mdev, * @dst: Destination buffer address * @src: Source buffer address * @len: Transfer length + * @stride: Read/write stride value to set */ static void msgdma_desc_config(struct msgdma_extended_desc *desc, dma_addr_t dst, dma_addr_t src, size_t len, diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 73a20780744b..45bbcd6146fd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -656,7 +656,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) /** * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine - * @desc: descriptor at the head of the transaction chain + * @tx: descriptor at the head of the transaction chain * * Queue chain if DMA engine is working already * @@ -1196,7 +1196,7 @@ err: return NULL; } -/** +/* * atc_dma_cyclic_check_values * Check for too big/unaligned periods and unaligned DMA buffer */ @@ -1217,7 +1217,7 @@ err_out: return -EINVAL; } -/** +/* * atc_dma_cyclic_fill_desc - Fill one period descriptor */ static int diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 2b06a7a8629d..a53e71d2bbd4 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -592,13 +592,25 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) caps->src_addr_widths = device->src_addr_widths; caps->dst_addr_widths = device->dst_addr_widths; caps->directions = device->directions; + caps->min_burst = device->min_burst; caps->max_burst = device->max_burst; + caps->max_sg_burst = device->max_sg_burst; caps->residue_granularity = device->residue_granularity; caps->descriptor_reuse = device->descriptor_reuse; caps->cmd_pause = !!device->device_pause; caps->cmd_resume = !!device->device_resume; caps->cmd_terminate = !!device->device_terminate_all; + /* + * DMA engine device might be configured with non-uniformly + * distributed slave capabilities per device channels. In this + * case the corresponding driver may provide the device_caps + * callback to override the generic capabilities with + * channel-specific ones. + */ + if (device->device_caps) + device->device_caps(chan, caps); + return 0; } EXPORT_SYMBOL_GPL(dma_get_slave_caps); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 604f80357931..45d4d92e91db 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -829,7 +829,10 @@ static int dmatest_func(void *data) result("test timed out", total_tests, src->off, dst->off, len, 0); goto error_unmap_continue; - } else if (status != DMA_COMPLETE) { + } else if (status != DMA_COMPLETE && + !(dma_has_cap(DMA_COMPLETION_NO_ORDER, + dev->cap_mask) && + status == DMA_OUT_OF_ORDER)) { result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src->off, @@ -1007,6 +1010,12 @@ static int dmatest_add_channel(struct dmatest_info *info, dtc->chan = chan; INIT_LIST_HEAD(&dtc->threads); + if (dma_has_cap(DMA_COMPLETION_NO_ORDER, dma_dev->cap_mask) && + info->params.polled) { + info->params.polled = false; + pr_warn("DMA_COMPLETION_NO_ORDER, polled disabled\n"); + } + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { if (dmatest == 0) { cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY); diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile index b6f06699e91a..a6f358ad8591 100644 --- a/drivers/dma/dw/Makefile +++ b/drivers/dma/dw/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o -dw_dmac_core-objs := core.o dw.o idma32.o +dw_dmac_core-y := core.o dw.o idma32.o +dw_dmac_core-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_DW_DMAC) += dw_dmac.o dw_dmac-y := platform.o -dw_dmac-$(CONFIG_ACPI) += acpi.o dw_dmac-$(CONFIG_OF) += of.o obj-$(CONFIG_DW_DMAC_PCI) += dw_dmac_pci.o -dw_dmac_pci-objs := pci.o +dw_dmac_pci-y := pci.o diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c index f6e8d55b4f6e..c510c109d2c3 100644 --- a/drivers/dma/dw/acpi.c +++ b/drivers/dma/dw/acpi.c @@ -41,6 +41,7 @@ void dw_dma_acpi_controller_register(struct dw_dma *dw) if (ret) dev_err(dev, "could not register acpi_dma_controller\n"); } +EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_register); void dw_dma_acpi_controller_free(struct dw_dma *dw) { @@ -51,3 +52,4 @@ void dw_dma_acpi_controller_free(struct dw_dma *dw) acpi_dma_controller_free(dev); } +EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_free); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index a1b56f52db2f..4700f2e87a62 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -786,6 +786,11 @@ static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); + dwc->dma_sconfig.src_maxburst = + clamp(dwc->dma_sconfig.src_maxburst, 0U, dwc->max_burst); + dwc->dma_sconfig.dst_maxburst = + clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst); + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); @@ -1037,6 +1042,25 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } +static void dwc_caps(struct dma_chan *chan, struct dma_slave_caps *caps) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + caps->max_burst = dwc->max_burst; + + /* + * It might be crucial for some devices to have the hardware + * accelerated multi-block transfers supported, aka LLPs in DW DMAC + * notation. So if LLPs are supported then max_sg_burst is set to + * zero which means unlimited number of SG entries can be handled in a + * single DMA transaction, otherwise it's just one SG entry. + */ + if (dwc->nollp) + caps->max_sg_burst = 1; + else + caps->max_sg_burst = 0; +} + int do_dma_probe(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; @@ -1166,11 +1190,23 @@ int do_dma_probe(struct dw_dma_chip *chip) */ dwc->block_size = (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; + + /* + * According to the DW DMA databook the true scatter- + * gether LLPs aren't available if either multi-block + * config is disabled (CHx_MULTI_BLK_EN == 0) or the + * LLP register is hard-coded to zeros + * (CHx_HC_LLP == 1). + */ dwc->nollp = - (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; + (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0 || + (dwc_params >> DWC_PARAMS_HC_LLP & 0x1) == 1; + dwc->max_burst = + (0x4 << (dwc_params >> DWC_PARAMS_MSIZE & 0x7)); } else { dwc->block_size = pdata->block_size; dwc->nollp = !pdata->multi_block[i]; + dwc->max_burst = pdata->max_burst[i] ?: DW_DMA_MAX_BURST; } } @@ -1193,6 +1229,7 @@ int do_dma_probe(struct dw_dma_chip *chip) dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; + dw->dma.device_caps = dwc_caps; dw->dma.device_config = dwc_config; dw->dma.device_pause = dwc_pause; dw->dma.device_resume = dwc_resume; @@ -1202,12 +1239,21 @@ int do_dma_probe(struct dw_dma_chip *chip) dw->dma.device_issue_pending = dwc_issue_pending; /* DMA capabilities */ + dw->dma.min_burst = DW_DMA_MIN_BURST; + dw->dma.max_burst = DW_DMA_MAX_BURST; dw->dma.src_addr_widths = DW_DMA_BUSWIDTHS; dw->dma.dst_addr_widths = DW_DMA_BUSWIDTHS; dw->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | BIT(DMA_MEM_TO_MEM); dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + /* + * For now there is no hardware with non uniform maximum block size + * across all of the device channels, so we set the maximum segment + * size as the block size found for the very first channel. + */ + dma_set_max_seg_size(dw->dma.dev, dw->chan[0].block_size); + err = dma_async_device_register(&dw->dma); if (err) goto err_dma_register; diff --git a/drivers/dma/dw/of.c b/drivers/dma/dw/of.c index 9e27831dee32..1474b3817ef4 100644 --- a/drivers/dma/dw/of.c +++ b/drivers/dma/dw/of.c @@ -98,6 +98,11 @@ struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev) pdata->multi_block[tmp] = 1; } + if (of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst, + nr_channels)) { + memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels); + } + if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) { if (tmp > CHAN_PROTCTL_MASK) return NULL; diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index cf6e8ec4c0ff..1142aa6f8c4a 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -60,6 +60,8 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) if (ret) return ret; + dw_dma_acpi_controller_register(chip->dw); + pci_set_drvdata(pdev, data); return 0; @@ -71,6 +73,8 @@ static void dw_pci_remove(struct pci_dev *pdev) struct dw_dma_chip *chip = data->chip; int ret; + dw_dma_acpi_controller_free(chip->dw); + ret = data->remove(chip); if (ret) dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret); diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 3fce66ecee7a..76654bd13c1a 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -125,6 +125,8 @@ struct dw_dma_regs { /* Bitfields in DWC_PARAMS */ #define DWC_PARAMS_MBLK_EN 11 /* multi block transfer */ +#define DWC_PARAMS_HC_LLP 13 /* set LLP register to zero */ +#define DWC_PARAMS_MSIZE 16 /* max group transaction size */ /* bursts size */ enum dw_dma_msize { @@ -283,6 +285,7 @@ struct dw_dma_chan { /* hardware configuration */ unsigned int block_size; bool nollp; + u32 max_burst; /* custom slave configuration */ struct dw_dma_slave dws; diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 9c8b4d35cf03..87a246012629 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -147,6 +147,7 @@ struct ep93xx_dma_desc { * is set via .device_config before slave operation is * prepared * @runtime_ctrl: M2M runtime values for the control register. + * @slave_config: slave configuration * * As EP93xx DMA controller doesn't support real chained DMA descriptors we * will have slightly different scheme here: @active points to a head of @@ -187,6 +188,7 @@ struct ep93xx_dma_chan { * @dma_dev: holds the dmaengine device * @m2m: is this an M2M or M2P device * @hw_setup: method which sets the channel up for operation + * @hw_synchronize: synchronizes DMA channel termination to current context * @hw_shutdown: shuts the channel down and flushes whatever is left * @hw_submit: pushes active descriptor(s) to the hardware * @hw_interrupt: handle the interrupt diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 95cc0256b387..ed2ab46b15e7 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -56,7 +56,7 @@ /* Registers for bit and genmask */ #define FSL_QDMA_CQIDR_SQT BIT(15) -#define QDMA_CCDF_FOTMAT BIT(29) +#define QDMA_CCDF_FORMAT BIT(29) #define QDMA_CCDF_SER BIT(30) #define QDMA_SG_FIN BIT(30) #define QDMA_SG_LEN_MASK GENMASK(29, 0) @@ -110,8 +110,19 @@ #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +/* Field definition for Descriptor status */ +#define QDMA_CCDF_STATUS_RTE BIT(5) +#define QDMA_CCDF_STATUS_WTE BIT(4) +#define QDMA_CCDF_STATUS_CDE BIT(2) +#define QDMA_CCDF_STATUS_SDE BIT(1) +#define QDMA_CCDF_STATUS_DDE BIT(0) +#define QDMA_CCDF_STATUS_MASK (QDMA_CCDF_STATUS_RTE | \ + QDMA_CCDF_STATUS_WTE | \ + QDMA_CCDF_STATUS_CDE | \ + QDMA_CCDF_STATUS_SDE | \ + QDMA_CCDF_STATUS_DDE) + /* Field definition for Descriptor offset */ -#define QDMA_CCDF_STATUS 20 #define QDMA_CCDF_OFFSET 20 #define QDMA_SDDF_CMD(x) (((u64)(x)) << 32) @@ -136,7 +147,7 @@ * @__reserved1: Reserved field. * @cfg8b_w1: Compound descriptor command queue origin produced * by qDMA and dynamic debug field. - * @data Pointer to the memory 40-bit address, describes DMA + * @data: Pointer to the memory 40-bit address, describes DMA * source information and DMA destination information. */ struct fsl_qdma_format { @@ -243,13 +254,14 @@ qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf) static inline void qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset) { - ccdf->cfg = cpu_to_le32(QDMA_CCDF_FOTMAT | offset); + ccdf->cfg = cpu_to_le32(QDMA_CCDF_FORMAT | + (offset << QDMA_CCDF_OFFSET)); } static inline int qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf) { - return (le32_to_cpu(ccdf->status) & QDMA_CCDF_MASK) >> QDMA_CCDF_STATUS; + return (le32_to_cpu(ccdf->status) & QDMA_CCDF_STATUS_MASK); } static inline void @@ -618,6 +630,7 @@ fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, { bool duplicate; u32 reg, i, count; + u8 completion_status; struct fsl_qdma_queue *temp_queue; struct fsl_qdma_format *status_addr; struct fsl_qdma_comp *fsl_comp = NULL; @@ -677,6 +690,8 @@ fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, } list_del(&fsl_comp->list); + completion_status = qdma_ccdf_get_status(status_addr); + reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR); reg |= FSL_QDMA_BSQMR_DI; qdma_desc_addr_set64(status_addr, 0x0); @@ -686,6 +701,31 @@ fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR); spin_unlock(&temp_queue->queue_lock); + /* The completion_status is evaluated here + * (outside of spin lock) + */ + if (completion_status) { + /* A completion error occurred! */ + if (completion_status & QDMA_CCDF_STATUS_WTE) { + /* Write transaction error */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_WRITE_FAILED; + } else if (completion_status & QDMA_CCDF_STATUS_RTE) { + /* Read transaction error */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_READ_FAILED; + } else { + /* Command/source/destination + * description error + */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_ABORTED; + dev_err(fsl_qdma->dma_dev.dev, + "DMA status descriptor error %x\n", + completion_status); + } + } + spin_lock(&fsl_comp->qchan->vchan.lock); vchan_cookie_complete(&fsl_comp->vdesc); fsl_comp->qchan->status = DMA_COMPLETE; @@ -700,11 +740,22 @@ static irqreturn_t fsl_qdma_error_handler(int irq, void *dev_id) unsigned int intr; struct fsl_qdma_engine *fsl_qdma = dev_id; void __iomem *status = fsl_qdma->status_base; + unsigned int decfdw0r; + unsigned int decfdw1r; + unsigned int decfdw2r; + unsigned int decfdw3r; intr = qdma_readl(fsl_qdma, status + FSL_QDMA_DEDR); - if (intr) - dev_err(fsl_qdma->dma_dev.dev, "DMA transaction error!\n"); + if (intr) { + decfdw0r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW0R); + decfdw1r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW1R); + decfdw2r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW2R); + decfdw3r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW3R); + dev_err(fsl_qdma->dma_dev.dev, + "DMA transaction error! (%x: %x-%x-%x-%x)\n", + intr, decfdw0r, decfdw1r, decfdw2r, decfdw3r); + } qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR); return IRQ_HANDLED; diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index ed3619266a48..e1a958ae7925 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -511,7 +511,6 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *dev = &pdev->dev; struct hisi_dma_dev *hdma_dev; struct dma_device *dma_dev; - size_t dev_size; int ret; ret = pcim_enable_device(pdev); @@ -534,9 +533,7 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return ret; - dev_size = sizeof(struct hisi_dma_chan) * HISI_DMA_CHAN_NUM + - sizeof(*hdma_dev); - hdma_dev = devm_kzalloc(dev, dev_size, GFP_KERNEL); + hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, HISI_DMA_CHAN_NUM), GFP_KERNEL); if (!hdma_dev) return -EINVAL; diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index cb376cf6a2d2..c3976156db2f 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -115,6 +115,9 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) dev_dbg(dev, "%s called\n", __func__); filep->private_data = NULL; + /* Wait for in-flight operations to complete. */ + idxd_wq_drain(wq); + kfree(ctx); mutex_lock(&wq->wq_lock); idxd_wq_put(wq); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8d2718c585dc..14b45853aa5f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -6,70 +6,39 @@ #include <linux/pci.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmaengine.h> +#include <linux/irq.h> +#include <linux/msi.h> #include <uapi/linux/idxd.h> #include "../dmaengine.h" #include "idxd.h" #include "registers.h" -static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout); -static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand); +static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + u32 *status); /* Interrupt control bits */ -int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) +void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct pci_dev *pdev = idxd->pdev; - int msixcnt = pci_msix_vec_count(pdev); - union msix_perm perm; - u32 offset; - - if (vec_id < 0 || vec_id >= msixcnt) - return -EINVAL; + struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); - offset = idxd->msix_perm_offset + vec_id * 8; - perm.bits = ioread32(idxd->reg_base + offset); - perm.ignore = 1; - iowrite32(perm.bits, idxd->reg_base + offset); - - return 0; + pci_msi_mask_irq(data); } void idxd_mask_msix_vectors(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; int msixcnt = pci_msix_vec_count(pdev); - int i, rc; + int i; - for (i = 0; i < msixcnt; i++) { - rc = idxd_mask_msix_vector(idxd, i); - if (rc < 0) - dev_warn(&pdev->dev, - "Failed disabling msix vec %d\n", i); - } + for (i = 0; i < msixcnt; i++) + idxd_mask_msix_vector(idxd, i); } -int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) +void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct pci_dev *pdev = idxd->pdev; - int msixcnt = pci_msix_vec_count(pdev); - union msix_perm perm; - u32 offset; - - if (vec_id < 0 || vec_id >= msixcnt) - return -EINVAL; + struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); - offset = idxd->msix_perm_offset + vec_id * 8; - perm.bits = ioread32(idxd->reg_base + offset); - perm.ignore = 0; - iowrite32(perm.bits, idxd->reg_base + offset); - - /* - * A readback from the device ensures that any previously generated - * completion record writes are visible to software based on PCI - * ordering rules. - */ - perm.bits = ioread32(idxd->reg_base + offset); - - return 0; + pci_msi_unmask_irq(data); } void idxd_unmask_error_interrupts(struct idxd_device *idxd) @@ -160,16 +129,14 @@ static int alloc_descs(struct idxd_wq *wq, int num) int idxd_wq_alloc_resources(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct idxd_group *group = wq->group; struct device *dev = &idxd->pdev->dev; int rc, num_descs, i; if (wq->type != IDXD_WQT_KERNEL) return 0; - num_descs = wq->size + - idxd->hw.gen_cap.max_descs_per_engine * group->num_engines; - wq->num_descs = num_descs; + wq->num_descs = wq->size; + num_descs = wq->size; rc = alloc_hw_descs(wq, num_descs); if (rc < 0) @@ -187,8 +154,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (rc < 0) goto fail_alloc_descs; - rc = sbitmap_init_node(&wq->sbmap, num_descs, -1, GFP_KERNEL, - dev_to_node(dev)); + rc = sbitmap_queue_init_node(&wq->sbq, num_descs, -1, false, GFP_KERNEL, + dev_to_node(dev)); if (rc < 0) goto fail_sbitmap_init; @@ -201,7 +168,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) sizeof(struct dsa_completion_record) * i; desc->id = i; desc->wq = wq; - + desc->cpu = -1; dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan); desc->txd.tx_submit = idxd_dma_tx_submit; } @@ -227,7 +194,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_hw_descs(wq); free_descs(wq); dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); - sbitmap_free(&wq->sbmap); + sbitmap_queue_free(&wq->sbq); } int idxd_wq_enable(struct idxd_wq *wq) @@ -235,21 +202,13 @@ int idxd_wq_enable(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; u32 status; - int rc; - - lockdep_assert_held(&idxd->dev_lock); if (wq->state == IDXD_WQ_ENABLED) { dev_dbg(dev, "WQ %d already enabled\n", wq->id); return -ENXIO; } - rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_WQ, wq->id); - if (rc < 0) - return rc; - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; + idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_WQ, wq->id, &status); if (status != IDXD_CMDSTS_SUCCESS && status != IDXD_CMDSTS_ERR_WQ_ENABLED) { @@ -267,9 +226,7 @@ int idxd_wq_disable(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; u32 status, operand; - int rc; - lockdep_assert_held(&idxd->dev_lock); dev_dbg(dev, "Disabling WQ %d\n", wq->id); if (wq->state != IDXD_WQ_ENABLED) { @@ -278,12 +235,7 @@ int idxd_wq_disable(struct idxd_wq *wq) } operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); - rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_WQ, operand); - if (rc < 0) - return rc; - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; + idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_WQ, operand, &status); if (status != IDXD_CMDSTS_SUCCESS) { dev_dbg(dev, "WQ disable failed: %#x\n", status); @@ -295,6 +247,22 @@ int idxd_wq_disable(struct idxd_wq *wq) return 0; } +void idxd_wq_drain(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + dev_dbg(dev, "Draining WQ %d\n", wq->id); + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL); +} + int idxd_wq_map_portal(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -357,66 +325,79 @@ static inline bool idxd_is_enabled(struct idxd_device *idxd) return false; } -static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout) +/* + * This is function is only used for reset during probe and will + * poll for completion. Once the device is setup with interrupts, + * all commands will be done via interrupt completion. + */ +void idxd_device_init_reset(struct idxd_device *idxd) { - u32 sts, to = timeout; - - lockdep_assert_held(&idxd->dev_lock); - sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - while (sts & IDXD_CMDSTS_ACTIVE && --to) { - cpu_relax(); - sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - } + struct device *dev = &idxd->pdev->dev; + union idxd_command_reg cmd; + unsigned long flags; - if (to == 0 && sts & IDXD_CMDSTS_ACTIVE) { - dev_warn(&idxd->pdev->dev, "%s timed out!\n", __func__); - *status = 0; - return -EBUSY; - } + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = IDXD_CMD_RESET_DEVICE; + dev_dbg(dev, "%s: sending reset for init.\n", __func__); + spin_lock_irqsave(&idxd->dev_lock, flags); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); - *status = sts; - return 0; + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & + IDXD_CMDSTS_ACTIVE) + cpu_relax(); + spin_unlock_irqrestore(&idxd->dev_lock, flags); } -static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand) +static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + u32 *status) { union idxd_command_reg cmd; - int rc; - u32 status; - - lockdep_assert_held(&idxd->dev_lock); - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; + DECLARE_COMPLETION_ONSTACK(done); + unsigned long flags; memset(&cmd, 0, sizeof(cmd)); cmd.cmd = cmd_code; cmd.operand = operand; + cmd.int_req = 1; + + spin_lock_irqsave(&idxd->dev_lock, flags); + wait_event_lock_irq(idxd->cmd_waitq, + !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), + idxd->dev_lock); + dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n", __func__, cmd_code, operand); + + __set_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); + idxd->cmd_done = &done; iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); - return 0; + /* + * After command submitted, release lock and go to sleep until + * the command completes via interrupt. + */ + spin_unlock_irqrestore(&idxd->dev_lock, flags); + wait_for_completion(&done); + spin_lock_irqsave(&idxd->dev_lock, flags); + if (status) + *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); + /* Wake up other pending commands */ + wake_up(&idxd->cmd_waitq); + spin_unlock_irqrestore(&idxd->dev_lock, flags); } int idxd_device_enable(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc; u32 status; - lockdep_assert_held(&idxd->dev_lock); if (idxd_is_enabled(idxd)) { dev_dbg(dev, "Device already enabled\n"); return -ENXIO; } - rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_DEVICE, 0); - if (rc < 0) - return rc; - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; + idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_DEVICE, 0, &status); /* If the command is successful or if the device was enabled */ if (status != IDXD_CMDSTS_SUCCESS && @@ -432,58 +413,29 @@ int idxd_device_enable(struct idxd_device *idxd) int idxd_device_disable(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc; u32 status; - lockdep_assert_held(&idxd->dev_lock); if (!idxd_is_enabled(idxd)) { dev_dbg(dev, "Device is not enabled\n"); return 0; } - rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_DEVICE, 0); - if (rc < 0) - return rc; - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; + idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_DEVICE, 0, &status); /* If the command is successful or if the device was disabled */ if (status != IDXD_CMDSTS_SUCCESS && !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) { dev_dbg(dev, "%s: err_code: %#x\n", __func__, status); - rc = -ENXIO; - return rc; + return -ENXIO; } idxd->state = IDXD_DEV_CONF_READY; return 0; } -int __idxd_device_reset(struct idxd_device *idxd) +void idxd_device_reset(struct idxd_device *idxd) { - u32 status; - int rc; - - rc = idxd_cmd_send(idxd, IDXD_CMD_RESET_DEVICE, 0); - if (rc < 0) - return rc; - rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT); - if (rc < 0) - return rc; - - return 0; -} - -int idxd_device_reset(struct idxd_device *idxd) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&idxd->dev_lock, flags); - rc = __idxd_device_reset(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - return rc; + idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); } /* Device configuration bits */ diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c64c1429d160..0c892cbd72e0 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -133,7 +133,7 @@ static enum dma_status idxd_dma_tx_status(struct dma_chan *dma_chan, dma_cookie_t cookie, struct dma_tx_state *txstate) { - return dma_cookie_status(dma_chan, cookie, txstate); + return DMA_OUT_OF_ORDER; } /* @@ -174,6 +174,7 @@ int idxd_register_dma_device(struct idxd_device *idxd) INIT_LIST_HEAD(&dma->channels); dma->dev = &idxd->pdev->dev; + dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); dma->device_release = idxd_dma_release; if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 908c8d0ef3ab..e62b4799d189 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -104,7 +104,6 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg wqcfg; - atomic_t dq_count; /* dedicated queue flow control */ u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; @@ -112,10 +111,8 @@ struct idxd_wq { dma_addr_t compls_addr; int compls_size; struct idxd_desc **descs; - struct sbitmap sbmap; + struct sbitmap_queue sbq; struct dma_chan dma_chan; - struct percpu_rw_semaphore submit_lock; - wait_queue_head_t submit_waitq; char name[WQ_NAME_SIZE + 1]; }; @@ -145,6 +142,7 @@ enum idxd_device_state { enum idxd_device_flag { IDXD_FLAG_CONFIGURABLE = 0, + IDXD_FLAG_CMD_RUNNING, }; struct idxd_device { @@ -161,6 +159,7 @@ struct idxd_device { void __iomem *reg_base; spinlock_t dev_lock; /* spinlock for device */ + struct completion *cmd_done; struct idxd_group *groups; struct idxd_wq *wqs; struct idxd_engine *engines; @@ -183,12 +182,14 @@ struct idxd_device { int nr_tokens; /* non-reserved tokens */ union sw_err_reg sw_err; - + wait_queue_head_t cmd_waitq; struct msix_entry *msix_entries; int num_wq_irqs; struct idxd_irq_entry *irq_entries; struct dma_device dma_dev; + struct workqueue_struct *wq; + struct work_struct work; }; /* IDXD software descriptor */ @@ -201,6 +202,7 @@ struct idxd_desc { struct llist_node llnode; struct list_head list; int id; + int cpu; struct idxd_wq *wq; }; @@ -271,14 +273,14 @@ irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); void idxd_unmask_error_interrupts(struct idxd_device *idxd); void idxd_mask_msix_vectors(struct idxd_device *idxd); -int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); -int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); +void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); +void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ +void idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); -int idxd_device_reset(struct idxd_device *idxd); -int __idxd_device_reset(struct idxd_device *idxd); +void idxd_device_reset(struct idxd_device *idxd); void idxd_device_cleanup(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); void idxd_device_wqs_clear_state(struct idxd_device *idxd); @@ -288,6 +290,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq); void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); int idxd_wq_disable(struct idxd_wq *wq); +void idxd_wq_drain(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7778c05deb5d..c7c61974f20f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -141,22 +141,12 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) return rc; } -static void idxd_wqs_free_lock(struct idxd_device *idxd) -{ - int i; - - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - percpu_free_rwsem(&wq->submit_lock); - } -} - static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; int i; + init_waitqueue_head(&idxd->cmd_waitq); idxd->groups = devm_kcalloc(dev, idxd->max_groups, sizeof(struct idxd_group), GFP_KERNEL); if (!idxd->groups) @@ -181,19 +171,11 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = &idxd->wqs[i]; - int rc; wq->id = i; wq->idxd = idxd; mutex_init(&wq->wq_lock); - atomic_set(&wq->dq_count, 0); - init_waitqueue_head(&wq->submit_waitq); wq->idxd_cdev.minor = -1; - rc = percpu_init_rwsem(&wq->submit_lock); - if (rc < 0) { - idxd_wqs_free_lock(idxd); - return rc; - } } for (i = 0; i < idxd->max_engines; i++) { @@ -201,6 +183,10 @@ static int idxd_setup_internals(struct idxd_device *idxd) idxd->engines[i].id = i; } + idxd->wq = create_workqueue(dev_name(dev)); + if (!idxd->wq) + return -ENOMEM; + return 0; } @@ -296,9 +282,7 @@ static int idxd_probe(struct idxd_device *idxd) int rc; dev_dbg(dev, "%s entered and resetting device\n", __func__); - rc = idxd_device_reset(idxd); - if (rc < 0) - return rc; + idxd_device_init_reset(idxd); dev_dbg(dev, "IDXD reset complete\n"); idxd_read_caps(idxd); @@ -433,11 +417,8 @@ static void idxd_shutdown(struct pci_dev *pdev) int rc, i; struct idxd_irq_entry *irq_entry; int msixcnt = pci_msix_vec_count(pdev); - unsigned long flags; - spin_lock_irqsave(&idxd->dev_lock, flags); rc = idxd_device_disable(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc) dev_err(&pdev->dev, "Disabling device failed\n"); @@ -453,6 +434,8 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } + + destroy_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) @@ -462,7 +445,6 @@ static void idxd_remove(struct pci_dev *pdev) dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_cleanup_sysfs(idxd); idxd_shutdown(pdev); - idxd_wqs_free_lock(idxd); mutex_lock(&idxd_idr_lock); idr_remove(&idxd_idrs[idxd->type], idxd->id); mutex_unlock(&idxd_idr_lock); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 8a35f58da689..b5142556cc4e 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -23,16 +23,13 @@ void idxd_device_wqs_clear_state(struct idxd_device *idxd) } } -static int idxd_restart(struct idxd_device *idxd) +static void idxd_device_reinit(struct work_struct *work) { - int i, rc; - - lockdep_assert_held(&idxd->dev_lock); - - rc = __idxd_device_reset(idxd); - if (rc < 0) - goto out; + struct idxd_device *idxd = container_of(work, struct idxd_device, work); + struct device *dev = &idxd->pdev->dev; + int rc, i; + idxd_device_reset(idxd); rc = idxd_device_config(idxd); if (rc < 0) goto out; @@ -47,19 +44,16 @@ static int idxd_restart(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); if (rc < 0) { - dev_warn(&idxd->pdev->dev, - "Unable to re-enable wq %s\n", + dev_warn(dev, "Unable to re-enable wq %s\n", dev_name(&wq->conf_dev)); } } } - return 0; + return; out: idxd_device_wqs_clear_state(idxd); - idxd->state = IDXD_DEV_HALTED; - return rc; } irqreturn_t idxd_irq_handler(int vec, void *data) @@ -78,7 +72,7 @@ irqreturn_t idxd_misc_thread(int vec, void *data) struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; u32 cause, val = 0; - int i, rc; + int i; bool err = false; cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); @@ -117,8 +111,8 @@ irqreturn_t idxd_misc_thread(int vec, void *data) } if (cause & IDXD_INTC_CMD) { - /* Driver does use command interrupts */ val |= IDXD_INTC_CMD; + complete(idxd->cmd_done); } if (cause & IDXD_INTC_OCCUPY) { @@ -145,21 +139,24 @@ irqreturn_t idxd_misc_thread(int vec, void *data) gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { - spin_lock_bh(&idxd->dev_lock); + idxd->state = IDXD_DEV_HALTED; if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { - rc = idxd_restart(idxd); - if (rc < 0) - dev_err(&idxd->pdev->dev, - "idxd restart failed, device halt."); + /* + * If we need a software reset, we will throw the work + * on a system workqueue in order to allow interrupts + * for the device command completions. + */ + INIT_WORK(&idxd->work, idxd_device_reinit); + queue_work(idxd->wq, &idxd->work); } else { + spin_lock_bh(&idxd->dev_lock); idxd_device_wqs_clear_state(idxd); - idxd->state = IDXD_DEV_HALTED; dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); + spin_unlock_bh(&idxd->dev_lock); } - spin_unlock_bh(&idxd->dev_lock); } out: @@ -264,8 +261,6 @@ irqreturn_t idxd_wq_thread(int irq, void *data) processed = idxd_desc_process(irq_entry); idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id); - /* catch anything unprocessed after unmasking */ - processed += idxd_desc_process(irq_entry); if (processed == 0) return IRQ_NONE; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 45a0c5869a0a..156a1ee233aa 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -8,61 +8,61 @@ #include "idxd.h" #include "registers.h" -struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) +static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) { struct idxd_desc *desc; - int idx; + + desc = wq->descs[idx]; + memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); + memset(desc->completion, 0, sizeof(struct dsa_completion_record)); + desc->cpu = cpu; + return desc; +} + +struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) +{ + int cpu, idx; struct idxd_device *idxd = wq->idxd; + DEFINE_SBQ_WAIT(wait); + struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; if (idxd->state != IDXD_DEV_ENABLED) return ERR_PTR(-EIO); - if (optype == IDXD_OP_BLOCK) - percpu_down_read(&wq->submit_lock); - else if (!percpu_down_read_trylock(&wq->submit_lock)) - return ERR_PTR(-EBUSY); - - if (!atomic_add_unless(&wq->dq_count, 1, wq->size)) { - int rc; - - if (optype == IDXD_OP_NONBLOCK) { - percpu_up_read(&wq->submit_lock); + sbq = &wq->sbq; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx < 0) { + if (optype == IDXD_OP_NONBLOCK) return ERR_PTR(-EAGAIN); - } - - percpu_up_read(&wq->submit_lock); - percpu_down_write(&wq->submit_lock); - rc = wait_event_interruptible(wq->submit_waitq, - atomic_add_unless(&wq->dq_count, - 1, wq->size) || - idxd->state != IDXD_DEV_ENABLED); - percpu_up_write(&wq->submit_lock); - if (rc < 0) - return ERR_PTR(-EINTR); - if (idxd->state != IDXD_DEV_ENABLED) - return ERR_PTR(-EIO); } else { - percpu_up_read(&wq->submit_lock); + return __get_desc(wq, idx, cpu); } - idx = sbitmap_get(&wq->sbmap, 0, false); - if (idx < 0) { - atomic_dec(&wq->dq_count); - return ERR_PTR(-EAGAIN); + ws = &sbq->ws[0]; + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); + if (signal_pending_state(TASK_INTERRUPTIBLE, current)) + break; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx > 0) + break; + schedule(); } - desc = wq->descs[idx]; - memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); - memset(desc->completion, 0, sizeof(struct dsa_completion_record)); - return desc; + sbitmap_finish_wait(sbq, ws, &wait); + if (idx < 0) + return ERR_PTR(-EAGAIN); + + return __get_desc(wq, idx, cpu); } void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) { - atomic_dec(&wq->dq_count); + int cpu = desc->cpu; - sbitmap_clear_bit(&wq->sbmap, desc->id); - wake_up(&wq->submit_waitq); + desc->cpu = -1; + sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 2e2c5082f322..dcba60953217 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -118,12 +118,11 @@ static int idxd_config_bus_probe(struct device *dev) if (!try_module_get(THIS_MODULE)) return -ENXIO; - spin_lock_irqsave(&idxd->dev_lock, flags); - /* Perform IDXD configuration and enabling */ + spin_lock_irqsave(&idxd->dev_lock, flags); rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { - spin_unlock_irqrestore(&idxd->dev_lock, flags); module_put(THIS_MODULE); dev_warn(dev, "Device config failed: %d\n", rc); return rc; @@ -132,18 +131,15 @@ static int idxd_config_bus_probe(struct device *dev) /* start device */ rc = idxd_device_enable(idxd); if (rc < 0) { - spin_unlock_irqrestore(&idxd->dev_lock, flags); module_put(THIS_MODULE); dev_warn(dev, "Device enable failed: %d\n", rc); return rc; } - spin_unlock_irqrestore(&idxd->dev_lock, flags); dev_info(dev, "Device %s enabled\n", dev_name(dev)); rc = idxd_register_dma_device(idxd); if (rc < 0) { - spin_unlock_irqrestore(&idxd->dev_lock, flags); module_put(THIS_MODULE); dev_dbg(dev, "Failed to register dmaengine device\n"); return rc; @@ -188,8 +184,8 @@ static int idxd_config_bus_probe(struct device *dev) spin_lock_irqsave(&idxd->dev_lock, flags); rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { - spin_unlock_irqrestore(&idxd->dev_lock, flags); mutex_unlock(&wq->wq_lock); dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc); @@ -198,13 +194,11 @@ static int idxd_config_bus_probe(struct device *dev) rc = idxd_wq_enable(wq); if (rc < 0) { - spin_unlock_irqrestore(&idxd->dev_lock, flags); mutex_unlock(&wq->wq_lock); dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc); return rc; } - spin_unlock_irqrestore(&idxd->dev_lock, flags); rc = idxd_wq_map_portal(wq); if (rc < 0) { @@ -212,7 +206,6 @@ static int idxd_config_bus_probe(struct device *dev) rc = idxd_wq_disable(wq); if (rc < 0) dev_warn(dev, "IDXD wq disable failed\n"); - spin_unlock_irqrestore(&idxd->dev_lock, flags); mutex_unlock(&wq->wq_lock); return rc; } @@ -248,7 +241,6 @@ static void disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - unsigned long flags; int rc; mutex_lock(&wq->wq_lock); @@ -269,9 +261,8 @@ static void disable_wq(struct idxd_wq *wq) idxd_wq_unmap_portal(wq); - spin_lock_irqsave(&idxd->dev_lock, flags); + idxd_wq_drain(wq); rc = idxd_wq_disable(wq); - spin_unlock_irqrestore(&idxd->dev_lock, flags); idxd_wq_free_resources(wq); wq->client_count = 0; @@ -287,7 +278,6 @@ static void disable_wq(struct idxd_wq *wq) static int idxd_config_bus_remove(struct device *dev) { int rc; - unsigned long flags; dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev)); @@ -313,14 +303,14 @@ static int idxd_config_bus_remove(struct device *dev) } idxd_unregister_dma_device(idxd); - spin_lock_irqsave(&idxd->dev_lock, flags); rc = idxd_device_disable(idxd); for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = &idxd->wqs[i]; + mutex_lock(&wq->wq_lock); idxd_wq_disable_cleanup(wq); + mutex_unlock(&wq->wq_lock); } - spin_unlock_irqrestore(&idxd->dev_lock, flags); module_put(THIS_MODULE); if (rc < 0) dev_warn(dev, "Device disable failed\n"); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 270992c4fe47..4f8d8f5e1132 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -335,7 +335,7 @@ struct sdma_desc { * @sdma: pointer to the SDMA engine for this channel * @channel: the channel number, matches dmaengine chan_id + 1 * @direction: transfer type. Needed for setting SDMA script - * @slave_config Slave configuration + * @slave_config: Slave configuration * @peripheral_type: Peripheral type. Needed for setting SDMA script * @event_id0: aka dma request line * @event_id1: for channels that use 2 events @@ -354,8 +354,10 @@ struct sdma_desc { * @shp_addr: value for gReg[6] * @per_addr: value for gReg[2] * @status: status of dma channel + * @context_loaded: ensure context is only loaded once * @data: specific sdma interface structure * @bd_pool: dma_pool for bd + * @terminate_worker: used to call back into terminate work function */ struct sdma_channel { struct virt_dma_chan vc; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index fd782aee02d9..a814b200299b 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -193,7 +193,7 @@ void ioat_issue_pending(struct dma_chan *c) /** * ioat_update_pending - log pending descriptors - * @ioat: ioat+ channel + * @ioat_chan: ioat+ channel * * Check if the number of unsubmitted descriptors has exceeded the * watermark. Called with prep_lock held @@ -457,7 +457,7 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) /** * ioat_check_space_lock - verify space and grab ring producer lock - * @ioat: ioat,3 channel (ring) to operate on + * @ioat_chan: ioat,3 channel (ring) to operate on * @num_descs: allocation length */ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) @@ -585,7 +585,8 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) /** * __cleanup - reclaim used descriptors - * @ioat: channel (ring) to clean + * @ioat_chan: channel (ring) to clean + * @phys_complete: zeroed (or not) completion address (from status) */ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 58d13564f88b..df0935a0021a 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -602,7 +602,7 @@ static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma) /** * ioat_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned + * @c: the channel to be cleaned */ static void ioat_free_chan_resources(struct dma_chan *c) { diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index db0e274126fb..3350bffb2e93 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -406,8 +406,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); /** * iop_adma_alloc_chan_resources - returns the number of allocated descriptors - * @chan - allocate descriptor resources for this channel - * @client - current client requesting the channel be ready for requests + * @chan: allocate descriptor resources for this channel * * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c index 4c58da742143..f133ae8dece1 100644 --- a/drivers/dma/mediatek/mtk-hsdma.c +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -107,10 +107,10 @@ enum mtk_hsdma_vdesc_flag { * struct mtk_hsdma_pdesc - This is the struct holding info describing physical * descriptor (PD) and its placement must be kept at * 4-bytes alignment in little endian order. - * @desc[1-4]: The control pad used to indicate hardware how to - * deal with the descriptor such as source and - * destination address and data length. The maximum - * data length each pdesc can handle is 0x3f80 bytes + * @desc1: | The control pad used to indicate hardware how to + * @desc2: | deal with the descriptor such as source and + * @desc3: | destination address and data length. The maximum + * @desc4: | data length each pdesc can handle is 0x3f80 bytes */ struct mtk_hsdma_pdesc { __le32 desc1; diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index ad06f260e907..f42f792db277 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -290,7 +290,7 @@ static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan) spin_unlock_irqrestore(&pdev->phy_lock, flags); } -/** +/* * start_pending_queue - transfer any pending transactions * pending list ==> running list */ @@ -381,7 +381,7 @@ mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan) return desc; } -/** +/* * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel. * * This function will create a dma pool for descriptor allocation. @@ -854,7 +854,7 @@ static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan, return ret; } -/** +/* * mmp_pdma_issue_pending - Issue the DMA start command * pending list ==> running list */ @@ -1060,7 +1060,7 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->dma_channels = dma_channels; for (i = 0; i < dma_channels; i++) { - if (platform_get_irq(op, i) > 0) + if (platform_get_irq_optional(op, i) > 0) irq_num++; } diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index dbc6a48424fa..960c7c40aef7 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -682,7 +682,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) if (irq_num != chan_num) { irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, irq, - mmp_tdma_int_handler, 0, "tdma", tdev); + mmp_tdma_int_handler, IRQF_SHARED, "tdma", tdev); if (ret) return ret; } diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 157c959311ea..9225f08dfee9 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -135,9 +135,11 @@ struct mv_xor_v2_descriptor { /** * struct mv_xor_v2_device - implements a xor device * @lock: lock for the engine + * @clk: reference to the 'core' clock + * @reg_clk: reference to the 'reg' clock * @dma_base: memory mapped DMA register base * @glob_base: memory mapped global register base - * @irq_tasklet: + * @irq_tasklet: tasklet used for IRQ handling call-backs * @free_sw_desc: linked list of free SW descriptors * @dmadev: dma device * @dmachan: dma channel @@ -146,6 +148,8 @@ struct mv_xor_v2_descriptor { * @sw_desq: SW descriptors queue * @desc_size: HW descriptor size * @npendings: number of pending descriptors (for which tx_submit has + * @hw_queue_idx: HW queue index + * @msi_desc: local interrupt descriptor information * been called, but not yet issue_pending) */ struct mv_xor_v2_device { diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 594409a6e975..74df621402e1 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -144,6 +144,7 @@ struct nbpf_link_desc { * @async_tx: dmaengine object * @user_wait: waiting for a user ack * @length: total transfer length + * @chan: associated DMAC channel * @sg: list of hardware descriptors, represented by struct nbpf_link_desc * @node: member in channel descriptor lists */ @@ -174,13 +175,17 @@ struct nbpf_desc_page { /** * struct nbpf_channel - one DMAC channel * @dma_chan: standard dmaengine channel object + * @tasklet: channel specific tasklet used for callbacks * @base: register address base * @nbpf: DMAC * @name: IRQ name * @irq: IRQ number - * @slave_addr: address for slave DMA - * @slave_width:slave data size in bytes - * @slave_burst:maximum slave burst size in bytes + * @slave_src_addr: source address for slave DMA + * @slave_src_width: source slave data size in bytes + * @slave_src_burst: maximum source slave burst size in bytes + * @slave_dst_addr: destination address for slave DMA + * @slave_dst_width: destination slave data size in bytes + * @slave_dst_burst: maximum destination slave burst size in bytes * @terminal: DMA terminal, assigned to this channel * @dmarq_cfg: DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG * @flags: configuration flags from DT @@ -191,6 +196,8 @@ struct nbpf_desc_page { * @active: list of descriptors, scheduled for processing * @done: list of completed descriptors, waiting post-processing * @desc_page: list of additionally allocated descriptor pages - if any + * @running: linked descriptor of running transaction + * @paused: are translations on this channel paused? */ struct nbpf_channel { struct dma_chan dma_chan; diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index b2c2b5e8093c..863f2aaf5c8f 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -46,7 +46,7 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) /** * of_dma_router_xlate - translation function for router devices * @dma_spec: pointer to DMA specifier as found in the device tree - * @of_dma: pointer to DMA controller data (router information) + * @ofdma: pointer to DMA controller data (router information) * * The function creates new dma_spec to be passed to the router driver's * of_dma_route_allocate() function to prepare a dma_spec which will be used @@ -92,7 +92,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, * @np: device node of DMA controller * @of_dma_xlate: translation function which converts a phandle * arguments list into a dma_chan structure - * @data pointer to controller specific data to be used by + * @data: pointer to controller specific data to be used by * translation function * * Returns 0 on success or appropriate errno value on error. @@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(of_dma_request_slave_channel); /** * of_dma_simple_xlate - Simple DMA engine translation function * @dma_spec: pointer to DMA specifier as found in the device tree - * @of_dma: pointer to DMA controller data + * @ofdma: pointer to DMA controller data * * A simple translation function for devices that use a 32-bit value for the * filter_param when calling the DMA engine dma_request_channel() function. @@ -323,7 +323,7 @@ EXPORT_SYMBOL_GPL(of_dma_simple_xlate); /** * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id * @dma_spec: pointer to DMA specifier as found in the device tree - * @of_dma: pointer to DMA controller data + * @ofdma: pointer to DMA controller data * * This function can be used as the of xlate callback for DMA driver which wants * to match the channel based on the channel id. When using this xlate function diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index 66ef70b00ec0..331c8d8b10a3 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -120,30 +120,38 @@ #define BIT_FIELD(val, width, shift, newshift) \ ((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift)) +/* Frame count value is fixed as 1 */ +#define FCNT_VAL 0x1 + /** - * struct owl_dma_lli_hw - Hardware link list for dma transfer - * @next_lli: physical address of the next link list - * @saddr: source physical address - * @daddr: destination physical address - * @flen: frame length - * @fcnt: frame count - * @src_stride: source stride - * @dst_stride: destination stride - * @ctrla: dma_mode and linklist ctrl config - * @ctrlb: interrupt config - * @const_num: data for constant fill + * owl_dmadesc_offsets - Describe DMA descriptor, hardware link + * list for dma transfer + * @OWL_DMADESC_NEXT_LLI: physical address of the next link list + * @OWL_DMADESC_SADDR: source physical address + * @OWL_DMADESC_DADDR: destination physical address + * @OWL_DMADESC_FLEN: frame length + * @OWL_DMADESC_SRC_STRIDE: source stride + * @OWL_DMADESC_DST_STRIDE: destination stride + * @OWL_DMADESC_CTRLA: dma_mode and linklist ctrl config + * @OWL_DMADESC_CTRLB: interrupt config + * @OWL_DMADESC_CONST_NUM: data for constant fill */ -struct owl_dma_lli_hw { - u32 next_lli; - u32 saddr; - u32 daddr; - u32 flen:20; - u32 fcnt:12; - u32 src_stride; - u32 dst_stride; - u32 ctrla; - u32 ctrlb; - u32 const_num; +enum owl_dmadesc_offsets { + OWL_DMADESC_NEXT_LLI = 0, + OWL_DMADESC_SADDR, + OWL_DMADESC_DADDR, + OWL_DMADESC_FLEN, + OWL_DMADESC_SRC_STRIDE, + OWL_DMADESC_DST_STRIDE, + OWL_DMADESC_CTRLA, + OWL_DMADESC_CTRLB, + OWL_DMADESC_CONST_NUM, + OWL_DMADESC_SIZE +}; + +enum owl_dma_id { + S900_DMA, + S700_DMA, }; /** @@ -153,7 +161,7 @@ struct owl_dma_lli_hw { * @node: node for txd's lli_list */ struct owl_dma_lli { - struct owl_dma_lli_hw hw; + u32 hw[OWL_DMADESC_SIZE]; dma_addr_t phys; struct list_head node; }; @@ -210,6 +218,7 @@ struct owl_dma_vchan { * @pchans: array of data for the physical channels * @nr_vchans: the number of physical channels * @vchans: array of data for the physical channels + * @devid: device id based on OWL SoC */ struct owl_dma { struct dma_device dma; @@ -224,6 +233,7 @@ struct owl_dma { unsigned int nr_vchans; struct owl_dma_vchan *vchans; + enum owl_dma_id devid; }; static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, @@ -313,11 +323,20 @@ static inline u32 llc_hw_ctrlb(u32 int_ctl) { u32 ctl; + /* + * Irrespective of the SoC, ctrlb value starts filling from + * bit 18. + */ ctl = BIT_FIELD(int_ctl, 7, 0, 18); return ctl; } +static u32 llc_hw_flen(struct owl_dma_lli *lli) +{ + return lli->hw[OWL_DMADESC_FLEN] & GENMASK(19, 0); +} + static void owl_dma_free_lli(struct owl_dma *od, struct owl_dma_lli *lli) { @@ -349,8 +368,9 @@ static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd, list_add_tail(&next->node, &txd->lli_list); if (prev) { - prev->hw.next_lli = next->phys; - prev->hw.ctrla |= llc_hw_ctrla(OWL_DMA_MODE_LME, 0); + prev->hw[OWL_DMADESC_NEXT_LLI] = next->phys; + prev->hw[OWL_DMADESC_CTRLA] |= + llc_hw_ctrla(OWL_DMA_MODE_LME, 0); } return next; @@ -363,8 +383,8 @@ static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan, struct dma_slave_config *sconfig, bool is_cyclic) { - struct owl_dma_lli_hw *hw = &lli->hw; - u32 mode; + struct owl_dma *od = to_owl_dma(vchan->vc.chan.device); + u32 mode, ctrlb; mode = OWL_DMA_MODE_PW(0); @@ -405,22 +425,40 @@ static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan, return -EINVAL; } - hw->next_lli = 0; /* One link list by default */ - hw->saddr = src; - hw->daddr = dst; - - hw->fcnt = 1; /* Frame count fixed as 1 */ - hw->flen = len; /* Max frame length is 1MB */ - hw->src_stride = 0; - hw->dst_stride = 0; - hw->ctrla = llc_hw_ctrla(mode, - OWL_DMA_LLC_SAV_LOAD_NEXT | - OWL_DMA_LLC_DAV_LOAD_NEXT); + lli->hw[OWL_DMADESC_CTRLA] = llc_hw_ctrla(mode, + OWL_DMA_LLC_SAV_LOAD_NEXT | + OWL_DMA_LLC_DAV_LOAD_NEXT); if (is_cyclic) - hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK); + ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK); else - hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK); + ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK); + + lli->hw[OWL_DMADESC_NEXT_LLI] = 0; /* One link list by default */ + lli->hw[OWL_DMADESC_SADDR] = src; + lli->hw[OWL_DMADESC_DADDR] = dst; + lli->hw[OWL_DMADESC_SRC_STRIDE] = 0; + lli->hw[OWL_DMADESC_DST_STRIDE] = 0; + + if (od->devid == S700_DMA) { + /* Max frame length is 1MB */ + lli->hw[OWL_DMADESC_FLEN] = len; + /* + * On S700, word starts from offset 0x1C is shared between + * frame count and ctrlb, where first 12 bits are for frame + * count and rest of 20 bits are for ctrlb. + */ + lli->hw[OWL_DMADESC_CTRLB] = FCNT_VAL | ctrlb; + } else { + /* + * On S900, word starts from offset 0xC is shared between + * frame length (max frame length is 1MB) and frame count, + * where first 20 bits are for frame length and rest of + * 12 bits are for frame count. + */ + lli->hw[OWL_DMADESC_FLEN] = len | FCNT_VAL << 20; + lli->hw[OWL_DMADESC_CTRLB] = ctrlb; + } return 0; } @@ -582,7 +620,7 @@ static irqreturn_t owl_dma_interrupt(int irq, void *dev_id) global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0); - if (chan_irq_pending && !(global_irq_pending & BIT(i))) { + if (chan_irq_pending && !(global_irq_pending & BIT(i))) { dev_dbg(od->dma.dev, "global and channel IRQ pending match err\n"); @@ -752,7 +790,7 @@ static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan) /* Start from the next active node */ if (lli->phys == next_lli_phy) { list_for_each_entry(lli, &txd->lli_list, node) - bytes += lli->hw.flen; + bytes += llc_hw_flen(lli); break; } } @@ -783,7 +821,7 @@ static enum dma_status owl_dma_tx_status(struct dma_chan *chan, if (vd) { txd = to_owl_txd(&vd->tx); list_for_each_entry(lli, &txd->lli_list, node) - bytes += lli->hw.flen; + bytes += llc_hw_flen(lli); } else { bytes = owl_dma_getbytes_chan(vchan); } @@ -1040,6 +1078,13 @@ static struct dma_chan *owl_dma_of_xlate(struct of_phandle_args *dma_spec, return chan; } +static const struct of_device_id owl_dma_match[] = { + { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,}, + { .compatible = "actions,s700-dma", .data = (void *)S700_DMA,}, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, owl_dma_match); + static int owl_dma_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1069,6 +1114,8 @@ static int owl_dma_probe(struct platform_device *pdev) dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n", nr_channels, nr_requests); + od->devid = (enum owl_dma_id)of_device_get_match_data(&pdev->dev); + od->nr_pchans = nr_channels; od->nr_vchans = nr_requests; @@ -1201,12 +1248,6 @@ static int owl_dma_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id owl_dma_match[] = { - { .compatible = "actions,s900-dma", }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, owl_dma_match); - static struct platform_driver owl_dma_driver = { .probe = owl_dma_probe, .remove = owl_dma_remove, diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 88b884cbb7c1..2c508ee672b9 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -33,7 +33,8 @@ #define PL330_MAX_PERI 32 #define PL330_MAX_BURST 16 -#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0) +#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0) +#define PL330_QUIRK_PERIPH_BURST BIT(1) enum pl330_cachectrl { CCTRL0, /* Noncacheable and nonbufferable */ @@ -284,7 +285,7 @@ struct pl330_config { u32 irq_ns; }; -/** +/* * Request Configuration. * The PL330 core does not modify this and uses the last * working configuration if the request doesn't provide any. @@ -509,6 +510,10 @@ static struct pl330_of_quirks { { .quirk = "arm,pl330-broken-no-flushp", .id = PL330_QUIRK_BROKEN_NO_FLUSHP, + }, + { + .quirk = "arm,pl330-periph-burst", + .id = PL330_QUIRK_PERIPH_BURST, } }; @@ -885,6 +890,12 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd, void __iomem *regs = thrd->dmac->base; u32 val; + /* If timed out due to halted state-machine */ + if (_until_dmac_idle(thrd)) { + dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n"); + return; + } + val = (insn[0] << 16) | (insn[1] << 24); if (!as_manager) { val |= (1 << 0); @@ -895,12 +906,6 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd, val = le32_to_cpu(*((__le32 *)&insn[2])); writel(val, regs + DBGINST1); - /* If timed out due to halted state-machine */ - if (_until_dmac_idle(thrd)) { - dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n"); - return; - } - /* Get going */ writel(0, regs + DBGCMD); } @@ -1183,9 +1188,6 @@ static inline int _ldst_peripheral(struct pl330_dmac *pl330, { int off = 0; - if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) - cond = BURST; - /* * do FLUSHP at beginning to clear any stale dma requests before the * first WFP. @@ -1209,6 +1211,9 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], int off = 0; enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE; + if (pl330->quirks & PL330_QUIRK_PERIPH_BURST) + cond = BURST; + switch (pxs->desc->rqtype) { case DMA_MEM_TO_DEV: /* fall through */ @@ -1231,8 +1236,9 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], } /* - * transfer dregs with single transfers to peripheral, or a reduced size burst - * for mem-to-mem. + * only the unaligned burst transfers have the dregs. + * so, still transfer dregs with a reduced size burst + * for mem-to-mem, mem-to-dev or dev-to-mem. */ static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[], const struct _xfer_spec *pxs, int transfer_length) @@ -1243,22 +1249,31 @@ static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[], if (transfer_length == 0) return off; + /* + * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) / + * BRST_SIZE(ccr) + * the dregs len must be smaller than burst len, + * so, for higher efficiency, we can modify CCR + * to use a reduced size burst len for the dregs. + */ + dregs_ccr = pxs->ccr; + dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) | + (0xf << CC_DSTBRSTLEN_SHFT)); + dregs_ccr |= (((transfer_length - 1) & 0xf) << + CC_SRCBRSTLEN_SHFT); + dregs_ccr |= (((transfer_length - 1) & 0xf) << + CC_DSTBRSTLEN_SHFT); + switch (pxs->desc->rqtype) { case DMA_MEM_TO_DEV: /* fall through */ case DMA_DEV_TO_MEM: - off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, - transfer_length, SINGLE); + off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr); + off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1, + BURST); break; case DMA_MEM_TO_MEM: - dregs_ccr = pxs->ccr; - dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) | - (0xf << CC_DSTBRSTLEN_SHFT)); - dregs_ccr |= (((transfer_length - 1) & 0xf) << - CC_SRCBRSTLEN_SHFT); - dregs_ccr |= (((transfer_length - 1) & 0xf) << - CC_DSTBRSTLEN_SHFT); off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr); off += _ldst_memtomem(dry_run, &buf[off], pxs, 1); break; @@ -2221,9 +2236,7 @@ static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch, static int fixup_burst_len(int max_burst_len, int quirks) { - if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) - return 1; - else if (max_burst_len > PL330_MAX_BURST) + if (max_burst_len > PL330_MAX_BURST) return PL330_MAX_BURST; else if (max_burst_len < 1) return 1; @@ -3128,8 +3141,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pd->dst_addr_widths = PL330_DMA_BUSWIDTHS; pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ? - 1 : PL330_MAX_BURST); + pd->max_burst = PL330_MAX_BURST; ret = dma_async_device_register(pd); if (ret) { diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index de8bfd9a76e9..21e2f1d0c210 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -381,6 +381,7 @@ struct d40_desc { * struct d40_lcla_pool - LCLA pool settings and data. * * @base: The virtual address of LCLA. 18 bit aligned. + * @dma_addr: DMA address, if mapped * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used. * This pointer is only there for clean-up on error. * @pages: The number of pages needed for all physical channels. @@ -534,6 +535,7 @@ struct d40_gen_dmac { * mode" allocated physical channels. * @num_log_chans: The number of logical channels. Calculated from * num_phy_chans. + * @dma_parms: DMA parameters for the channel * @dma_both: dma_device channels that can do both memcpy and slave transfers. * @dma_slave: dma_device channels that can do only do slave transfers. * @dma_memcpy: dma_device channels that can do only do memcpy transfers. diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c index e7ff09a5031d..e8b6633ae661 100644 --- a/drivers/dma/sun4i-dma.c +++ b/drivers/dma/sun4i-dma.c @@ -307,7 +307,7 @@ static void set_pchan_interrupt(struct sun4i_dma_dev *priv, spin_unlock_irqrestore(&priv->lock, flags); } -/** +/* * Execute pending operations on a vchan * * When given a vchan, this function will try to acquire a suitable @@ -419,7 +419,7 @@ static int sanitize_config(struct dma_slave_config *sconfig, return 0; } -/** +/* * Generate a promise, to be used in a normal DMA contract. * * A NDMA promise contains all the information required to program the @@ -486,7 +486,7 @@ fail: return NULL; } -/** +/* * Generate a promise, to be used in a dedicated DMA contract. * * A DDMA promise contains all the information required to program the @@ -543,7 +543,7 @@ fail: return NULL; } -/** +/* * Generate a contract * * Contracts function as DMA descriptors. As our hardware does not support @@ -565,7 +565,7 @@ static struct sun4i_dma_contract *generate_dma_contract(void) return contract; } -/** +/* * Get next promise on a cyclic transfer * * Cyclic contracts contain a series of promises which are executed on a @@ -589,7 +589,7 @@ get_next_cyclic_promise(struct sun4i_dma_contract *contract) return promise; } -/** +/* * Free a contract and all its associated promises */ static void sun4i_dma_free_contract(struct virt_dma_desc *vd) diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index 64c8955e0cf1..d66ed18303a4 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -186,17 +186,17 @@ static void k3_udma_glue_dump_tx_rt_chn(struct k3_udma_glue_tx_channel *chn, struct device *dev = chn->common.dev; dev_dbg(dev, "=== dump ===> %s\n", mark); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_CTL_REG, - xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PEER_RT_EN_REG, + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG, xudma_tchanrt_read(chn->udma_tchanx, - UDMA_TCHAN_RT_PEER_RT_EN_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PCNT_REG, - xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_PCNT_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_BCNT_REG, - xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_BCNT_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_SBCNT_REG, - xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_SBCNT_REG)); + UDMA_CHAN_RT_PEER_RT_EN_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_PCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_BCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_SBCNT_REG)); } static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) @@ -389,14 +389,13 @@ int k3_udma_glue_enable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) u32 txrt_ctl; txrt_ctl = UDMA_PEER_RT_EN_ENABLE; - xudma_tchanrt_write(tx_chn->udma_tchanx, - UDMA_TCHAN_RT_PEER_RT_EN_REG, + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, txrt_ctl); txrt_ctl = xudma_tchanrt_read(tx_chn->udma_tchanx, - UDMA_TCHAN_RT_CTL_REG); + UDMA_CHAN_RT_CTL_REG); txrt_ctl |= UDMA_CHAN_RT_CTL_EN; - xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, txrt_ctl); k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en"); @@ -408,10 +407,10 @@ void k3_udma_glue_disable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) { k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1"); - xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, 0); + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, 0); xudma_tchanrt_write(tx_chn->udma_tchanx, - UDMA_TCHAN_RT_PEER_RT_EN_REG, 0); + UDMA_CHAN_RT_PEER_RT_EN_REG, 0); k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2"); } EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn); @@ -424,14 +423,14 @@ void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1"); - xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN); - val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG); + val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG); while (sync && (val & UDMA_CHAN_RT_CTL_EN)) { val = xudma_tchanrt_read(tx_chn->udma_tchanx, - UDMA_TCHAN_RT_CTL_REG); + UDMA_CHAN_RT_CTL_REG); udelay(1); if (i > K3_UDMAX_TDOWN_TIMEOUT_US) { dev_err(tx_chn->common.dev, "TX tdown timeout\n"); @@ -441,7 +440,7 @@ void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, } val = xudma_tchanrt_read(tx_chn->udma_tchanx, - UDMA_TCHAN_RT_PEER_RT_EN_REG); + UDMA_CHAN_RT_PEER_RT_EN_REG); if (sync && (val & UDMA_PEER_RT_EN_ENABLE)) dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n"); k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2"); @@ -716,17 +715,17 @@ static void k3_udma_glue_dump_rx_rt_chn(struct k3_udma_glue_rx_channel *chn, dev_dbg(dev, "=== dump ===> %s\n", mark); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_CTL_REG, - xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PEER_RT_EN_REG, + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG, xudma_rchanrt_read(chn->udma_rchanx, - UDMA_RCHAN_RT_PEER_RT_EN_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PCNT_REG, - xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_PCNT_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_BCNT_REG, - xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_BCNT_REG)); - dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_SBCNT_REG, - xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_SBCNT_REG)); + UDMA_CHAN_RT_PEER_RT_EN_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_PCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_BCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_SBCNT_REG)); } static int @@ -1084,13 +1083,12 @@ int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) return -EINVAL; rxrt_ctl = xudma_rchanrt_read(rx_chn->udma_rchanx, - UDMA_RCHAN_RT_CTL_REG); + UDMA_CHAN_RT_CTL_REG); rxrt_ctl |= UDMA_CHAN_RT_CTL_EN; - xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG, + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, rxrt_ctl); - xudma_rchanrt_write(rx_chn->udma_rchanx, - UDMA_RCHAN_RT_PEER_RT_EN_REG, + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE); k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en"); @@ -1103,9 +1101,8 @@ void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1"); xudma_rchanrt_write(rx_chn->udma_rchanx, - UDMA_RCHAN_RT_PEER_RT_EN_REG, - 0); - xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG, 0); + UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, 0); k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2"); } @@ -1122,14 +1119,14 @@ void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1"); - xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_PEER_RT_EN_REG, + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN); - val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG); + val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG); while (sync && (val & UDMA_CHAN_RT_CTL_EN)) { val = xudma_rchanrt_read(rx_chn->udma_rchanx, - UDMA_RCHAN_RT_CTL_REG); + UDMA_CHAN_RT_CTL_REG); udelay(1); if (i > K3_UDMAX_TDOWN_TIMEOUT_US) { dev_err(rx_chn->common.dev, "RX tdown timeout\n"); @@ -1139,7 +1136,7 @@ void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, } val = xudma_rchanrt_read(rx_chn->udma_rchanx, - UDMA_RCHAN_RT_PEER_RT_EN_REG); + UDMA_CHAN_RT_PEER_RT_EN_REG); if (sync && (val & UDMA_PEER_RT_EN_ENABLE)) dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n"); k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2"); diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c index 77e8e67d995b..aa24e554f7b4 100644 --- a/drivers/dma/ti/k3-udma-private.c +++ b/drivers/dma/ti/k3-udma-private.c @@ -121,13 +121,17 @@ XUDMA_GET_RESOURCE_ID(rflow); #define XUDMA_RT_IO_FUNCTIONS(res) \ u32 xudma_##res##rt_read(struct udma_##res *p, int reg) \ { \ - return udma_##res##rt_read(p, reg); \ + if (!p) \ + return 0; \ + return udma_read(p->reg_rt, reg); \ } \ EXPORT_SYMBOL(xudma_##res##rt_read); \ \ void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val) \ { \ - udma_##res##rt_write(p, reg, val); \ + if (!p) \ + return; \ + udma_write(p->reg_rt, reg, val); \ } \ EXPORT_SYMBOL(xudma_##res##rt_write) XUDMA_RT_IO_FUNCTIONS(tchan); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 3aeeafe92fd6..e427be50f76c 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -92,9 +92,6 @@ struct udma_match_data { u32 flags; u32 statictr_z_mask; u32 rchan_oes_offset; - - u8 tpl_levels; - u32 level_start_idx[]; }; struct udma_hwdesc { @@ -121,6 +118,9 @@ struct udma_dev { void __iomem *mmrs[MMR_LAST]; const struct udma_match_data *match_data; + u8 tpl_levels; + u32 tpl_start_idx[3]; + size_t desc_align; /* alignment to use for descriptors */ struct udma_tisci_rm tisci_rm; @@ -282,51 +282,49 @@ static inline void udma_update_bits(void __iomem *base, int reg, } /* TCHANRT */ -static inline u32 udma_tchanrt_read(struct udma_tchan *tchan, int reg) +static inline u32 udma_tchanrt_read(struct udma_chan *uc, int reg) { - if (!tchan) + if (!uc->tchan) return 0; - return udma_read(tchan->reg_rt, reg); + return udma_read(uc->tchan->reg_rt, reg); } -static inline void udma_tchanrt_write(struct udma_tchan *tchan, int reg, - u32 val) +static inline void udma_tchanrt_write(struct udma_chan *uc, int reg, u32 val) { - if (!tchan) + if (!uc->tchan) return; - udma_write(tchan->reg_rt, reg, val); + udma_write(uc->tchan->reg_rt, reg, val); } -static inline void udma_tchanrt_update_bits(struct udma_tchan *tchan, int reg, +static inline void udma_tchanrt_update_bits(struct udma_chan *uc, int reg, u32 mask, u32 val) { - if (!tchan) + if (!uc->tchan) return; - udma_update_bits(tchan->reg_rt, reg, mask, val); + udma_update_bits(uc->tchan->reg_rt, reg, mask, val); } /* RCHANRT */ -static inline u32 udma_rchanrt_read(struct udma_rchan *rchan, int reg) +static inline u32 udma_rchanrt_read(struct udma_chan *uc, int reg) { - if (!rchan) + if (!uc->rchan) return 0; - return udma_read(rchan->reg_rt, reg); + return udma_read(uc->rchan->reg_rt, reg); } -static inline void udma_rchanrt_write(struct udma_rchan *rchan, int reg, - u32 val) +static inline void udma_rchanrt_write(struct udma_chan *uc, int reg, u32 val) { - if (!rchan) + if (!uc->rchan) return; - udma_write(rchan->reg_rt, reg, val); + udma_write(uc->rchan->reg_rt, reg, val); } -static inline void udma_rchanrt_update_bits(struct udma_rchan *rchan, int reg, +static inline void udma_rchanrt_update_bits(struct udma_chan *uc, int reg, u32 mask, u32 val) { - if (!rchan) + if (!uc->rchan) return; - udma_update_bits(rchan->reg_rt, reg, mask, val); + udma_update_bits(uc->rchan->reg_rt, reg, mask, val); } static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread) @@ -366,18 +364,18 @@ static void udma_dump_chan_stdata(struct udma_chan *uc) if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) { dev_dbg(dev, "TCHAN State data:\n"); for (i = 0; i < 32; i++) { - offset = UDMA_TCHAN_RT_STDATA_REG + i * 4; + offset = UDMA_CHAN_RT_STDATA_REG + i * 4; dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i, - udma_tchanrt_read(uc->tchan, offset)); + udma_tchanrt_read(uc, offset)); } } if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) { dev_dbg(dev, "RCHAN State data:\n"); for (i = 0; i < 32; i++) { - offset = UDMA_RCHAN_RT_STDATA_REG + i * 4; + offset = UDMA_CHAN_RT_STDATA_REG + i * 4; dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i, - udma_rchanrt_read(uc->rchan, offset)); + udma_rchanrt_read(uc, offset)); } } } @@ -500,9 +498,9 @@ static bool udma_is_chan_running(struct udma_chan *uc) u32 rrt_ctl = 0; if (uc->tchan) - trt_ctl = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG); + trt_ctl = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); if (uc->rchan) - rrt_ctl = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_CTL_REG); + rrt_ctl = udma_rchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN) return true; @@ -516,17 +514,15 @@ static bool udma_is_chan_paused(struct udma_chan *uc) switch (uc->config.dir) { case DMA_DEV_TO_MEM: - val = udma_rchanrt_read(uc->rchan, - UDMA_RCHAN_RT_PEER_RT_EN_REG); + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG); pause_mask = UDMA_PEER_RT_EN_PAUSE; break; case DMA_MEM_TO_DEV: - val = udma_tchanrt_read(uc->tchan, - UDMA_TCHAN_RT_PEER_RT_EN_REG); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG); pause_mask = UDMA_PEER_RT_EN_PAUSE; break; case DMA_MEM_TO_MEM: - val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); pause_mask = UDMA_CHAN_RT_CTL_PAUSE; break; default: @@ -539,30 +535,6 @@ static bool udma_is_chan_paused(struct udma_chan *uc) return false; } -static void udma_sync_for_device(struct udma_chan *uc, int idx) -{ - struct udma_desc *d = uc->desc; - - if (uc->cyclic && uc->config.pkt_mode) { - dma_sync_single_for_device(uc->ud->dev, - d->hwdesc[idx].cppi5_desc_paddr, - d->hwdesc[idx].cppi5_desc_size, - DMA_TO_DEVICE); - } else { - int i; - - for (i = 0; i < d->hwdesc_count; i++) { - if (!d->hwdesc[i].cppi5_desc_vaddr) - continue; - - dma_sync_single_for_device(uc->ud->dev, - d->hwdesc[i].cppi5_desc_paddr, - d->hwdesc[i].cppi5_desc_size, - DMA_TO_DEVICE); - } - } -} - static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc) { return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr; @@ -593,7 +565,6 @@ static int udma_push_to_ring(struct udma_chan *uc, int idx) paddr = udma_curr_cppi5_desc_paddr(d, idx); wmb(); /* Ensure that writes are not moved over this point */ - udma_sync_for_device(uc, idx); } return k3_ringacc_ring_push(ring, &paddr); @@ -613,7 +584,7 @@ static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr) static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr) { struct k3_ring *ring = NULL; - int ret = -ENOENT; + int ret; switch (uc->config.dir) { case DMA_DEV_TO_MEM: @@ -624,34 +595,24 @@ static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr) ring = uc->tchan->tc_ring; break; default: - break; + return -ENOENT; } - if (ring && k3_ringacc_ring_get_occ(ring)) { - struct udma_desc *d = NULL; - - ret = k3_ringacc_ring_pop(ring, addr); - if (ret) - return ret; - - /* Teardown completion */ - if (cppi5_desc_is_tdcm(*addr)) - return ret; + ret = k3_ringacc_ring_pop(ring, addr); + if (ret) + return ret; - /* Check for flush descriptor */ - if (udma_desc_is_rx_flush(uc, *addr)) - return -ENOENT; + rmb(); /* Ensure that reads are not moved before this point */ - d = udma_udma_desc_from_paddr(uc, *addr); + /* Teardown completion */ + if (cppi5_desc_is_tdcm(*addr)) + return 0; - if (d) - dma_sync_single_for_cpu(uc->ud->dev, *addr, - d->hwdesc[0].cppi5_desc_size, - DMA_FROM_DEVICE); - rmb(); /* Ensure that reads are not moved before this point */ - } + /* Check for flush descriptor */ + if (udma_desc_is_rx_flush(uc, *addr)) + return -ENOENT; - return ret; + return 0; } static void udma_reset_rings(struct udma_chan *uc) @@ -695,31 +656,31 @@ static void udma_reset_counters(struct udma_chan *uc) u32 val; if (uc->tchan) { - val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_BCNT_REG, val); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); - val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG, val); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); - val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PCNT_REG); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PCNT_REG, val); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val); - val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG, val); + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } if (uc->rchan) { - val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_BCNT_REG); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_BCNT_REG, val); + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); - val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG, val); + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); - val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PCNT_REG); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PCNT_REG, val); + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val); - val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG, val); + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } uc->bcnt = 0; @@ -729,16 +690,16 @@ static int udma_reset_chan(struct udma_chan *uc, bool hard) { switch (uc->config.dir) { case DMA_DEV_TO_MEM: - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, 0); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); break; case DMA_MEM_TO_DEV: - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, 0); + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0); break; case DMA_MEM_TO_MEM: - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0); + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); break; default: return -EINVAL; @@ -766,7 +727,7 @@ static int udma_reset_chan(struct udma_chan *uc, bool hard) * the rchan. */ if (uc->config.dir == DMA_DEV_TO_MEM) - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN | UDMA_CHAN_RT_CTL_FTDOWN); @@ -843,11 +804,12 @@ static int udma_start(struct udma_chan *uc) if (uc->config.enable_burst) val |= PDMA_STATIC_TR_XY_BURST; - udma_rchanrt_write(uc->rchan, - UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG, val); + udma_rchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG, + val); - udma_rchanrt_write(uc->rchan, - UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG, + udma_rchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG, PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt, match_data->statictr_z_mask)); @@ -856,11 +818,11 @@ static int udma_start(struct udma_chan *uc) sizeof(uc->static_tr)); } - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN); /* Enable remote */ - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE); break; @@ -875,8 +837,9 @@ static int udma_start(struct udma_chan *uc) if (uc->config.enable_burst) val |= PDMA_STATIC_TR_XY_BURST; - udma_tchanrt_write(uc->tchan, - UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG, val); + udma_tchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG, + val); /* save the current staticTR configuration */ memcpy(&uc->static_tr, &uc->desc->static_tr, @@ -884,17 +847,17 @@ static int udma_start(struct udma_chan *uc) } /* Enable remote */ - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN); break; case DMA_MEM_TO_MEM: - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN); break; @@ -920,20 +883,20 @@ static int udma_stop(struct udma_chan *uc) if (!uc->cyclic && !uc->desc) udma_push_to_ring(uc, -1); - udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN); break; case DMA_MEM_TO_DEV: - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_FLUSH); - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN); break; case DMA_MEM_TO_MEM: - udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN); break; @@ -973,8 +936,8 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d) uc->config.dir != DMA_MEM_TO_DEV) return true; - peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG); - bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG); + peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); /* Transfer is incomplete, store current residue and time stamp */ if (peer_bcnt < bcnt) { @@ -1247,10 +1210,10 @@ static struct udma_##res *__udma_reserve_##res(struct udma_dev *ud, \ } else { \ int start; \ \ - if (tpl >= ud->match_data->tpl_levels) \ - tpl = ud->match_data->tpl_levels - 1; \ + if (tpl >= ud->tpl_levels) \ + tpl = ud->tpl_levels - 1; \ \ - start = ud->match_data->level_start_idx[tpl]; \ + start = ud->tpl_start_idx[tpl]; \ \ id = find_next_zero_bit(ud->res##_map, ud->res##_cnt, \ start); \ @@ -1299,7 +1262,6 @@ static int udma_get_rchan(struct udma_chan *uc) static int udma_get_chan_pair(struct udma_chan *uc) { struct udma_dev *ud = uc->ud; - const struct udma_match_data *match_data = ud->match_data; int chan_id, end; if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) { @@ -1321,7 +1283,7 @@ static int udma_get_chan_pair(struct udma_chan *uc) /* Can be optimized, but let's have it like this for now */ end = min(ud->tchan_cnt, ud->rchan_cnt); /* Try to use the highest TPL channel pair for MEM_TO_MEM channels */ - chan_id = match_data->level_start_idx[match_data->tpl_levels - 1]; + chan_id = ud->tpl_start_idx[ud->tpl_levels - 1]; for (; chan_id < end; chan_id++) { if (!test_bit(chan_id, ud->tchan_map) && !test_bit(chan_id, ud->rchan_map)) @@ -2207,7 +2169,7 @@ udma_prep_slave_sg_pkt(struct udma_chan *uc, struct scatterlist *sgl, u32 ring_id; unsigned int i; - d = kzalloc(sizeof(*d) + sglen * sizeof(d->hwdesc[0]), GFP_NOWAIT); + d = kzalloc(struct_size(d, hwdesc, sglen), GFP_NOWAIT); if (!d) return NULL; @@ -2523,7 +2485,7 @@ udma_prep_dma_cyclic_pkt(struct udma_chan *uc, dma_addr_t buf_addr, if (period_len >= SZ_4M) return NULL; - d = kzalloc(sizeof(*d) + periods * sizeof(d->hwdesc[0]), GFP_NOWAIT); + d = kzalloc(struct_size(d, hwdesc, periods), GFP_NOWAIT); if (!d) return NULL; @@ -2773,30 +2735,27 @@ static enum dma_status udma_tx_status(struct dma_chan *chan, u32 delay = 0; if (uc->desc->dir == DMA_MEM_TO_DEV) { - bcnt = udma_tchanrt_read(uc->tchan, - UDMA_TCHAN_RT_SBCNT_REG); + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); if (uc->config.ep_type != PSIL_EP_NATIVE) { - peer_bcnt = udma_tchanrt_read(uc->tchan, - UDMA_TCHAN_RT_PEER_BCNT_REG); + peer_bcnt = udma_tchanrt_read(uc, + UDMA_CHAN_RT_PEER_BCNT_REG); if (bcnt > peer_bcnt) delay = bcnt - peer_bcnt; } } else if (uc->desc->dir == DMA_DEV_TO_MEM) { - bcnt = udma_rchanrt_read(uc->rchan, - UDMA_RCHAN_RT_BCNT_REG); + bcnt = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); if (uc->config.ep_type != PSIL_EP_NATIVE) { - peer_bcnt = udma_rchanrt_read(uc->rchan, - UDMA_RCHAN_RT_PEER_BCNT_REG); + peer_bcnt = udma_rchanrt_read(uc, + UDMA_CHAN_RT_PEER_BCNT_REG); if (peer_bcnt > bcnt) delay = peer_bcnt - bcnt; } } else { - bcnt = udma_tchanrt_read(uc->tchan, - UDMA_TCHAN_RT_BCNT_REG); + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); } bcnt -= uc->bcnt; @@ -2829,19 +2788,17 @@ static int udma_pause(struct dma_chan *chan) /* pause the channel */ switch (uc->config.dir) { case DMA_DEV_TO_MEM: - udma_rchanrt_update_bits(uc->rchan, - UDMA_RCHAN_RT_PEER_RT_EN_REG, + udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_PAUSE, UDMA_PEER_RT_EN_PAUSE); break; case DMA_MEM_TO_DEV: - udma_tchanrt_update_bits(uc->tchan, - UDMA_TCHAN_RT_PEER_RT_EN_REG, + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_PAUSE, UDMA_PEER_RT_EN_PAUSE); break; case DMA_MEM_TO_MEM: - udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_PAUSE, UDMA_CHAN_RT_CTL_PAUSE); break; @@ -2859,18 +2816,16 @@ static int udma_resume(struct dma_chan *chan) /* resume the channel */ switch (uc->config.dir) { case DMA_DEV_TO_MEM: - udma_rchanrt_update_bits(uc->rchan, - UDMA_RCHAN_RT_PEER_RT_EN_REG, + udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_PAUSE, 0); break; case DMA_MEM_TO_DEV: - udma_tchanrt_update_bits(uc->tchan, - UDMA_TCHAN_RT_PEER_RT_EN_REG, + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, UDMA_PEER_RT_EN_PAUSE, 0); break; case DMA_MEM_TO_MEM: - udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG, + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG, UDMA_CHAN_RT_CTL_PAUSE, 0); break; default: @@ -3159,11 +3114,6 @@ static struct udma_match_data am654_main_data = { .enable_memcpy_support = true, .statictr_z_mask = GENMASK(11, 0), .rchan_oes_offset = 0x2000, - .tpl_levels = 2, - .level_start_idx = { - [0] = 8, /* Normal channels */ - [1] = 0, /* High Throughput channels */ - }, }; static struct udma_match_data am654_mcu_data = { @@ -3171,11 +3121,6 @@ static struct udma_match_data am654_mcu_data = { .enable_memcpy_support = false, .statictr_z_mask = GENMASK(11, 0), .rchan_oes_offset = 0x2000, - .tpl_levels = 2, - .level_start_idx = { - [0] = 2, /* Normal channels */ - [1] = 0, /* High Throughput channels */ - }, }; static struct udma_match_data j721e_main_data = { @@ -3184,12 +3129,6 @@ static struct udma_match_data j721e_main_data = { .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST, .statictr_z_mask = GENMASK(23, 0), .rchan_oes_offset = 0x400, - .tpl_levels = 3, - .level_start_idx = { - [0] = 16, /* Normal channels */ - [1] = 4, /* High Throughput channels */ - [2] = 0, /* Ultra High Throughput channels */ - }, }; static struct udma_match_data j721e_mcu_data = { @@ -3198,11 +3137,6 @@ static struct udma_match_data j721e_mcu_data = { .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST, .statictr_z_mask = GENMASK(23, 0), .rchan_oes_offset = 0x400, - .tpl_levels = 2, - .level_start_idx = { - [0] = 2, /* Normal channels */ - [1] = 0, /* High Throughput channels */ - }, }; static const struct of_device_id udma_of_match[] = { @@ -3251,15 +3185,36 @@ static int udma_setup_resources(struct udma_dev *ud) "ti,sci-rm-range-rchan", "ti,sci-rm-range-rflow" }; - cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28); - cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c); + cap2 = udma_read(ud->mmrs[MMR_GCFG], UDMA_CAP_REG(2)); + cap3 = udma_read(ud->mmrs[MMR_GCFG], UDMA_CAP_REG(3)); - ud->rflow_cnt = cap3 & 0x3fff; - ud->tchan_cnt = cap2 & 0x1ff; - ud->echan_cnt = (cap2 >> 9) & 0x1ff; - ud->rchan_cnt = (cap2 >> 18) & 0x1ff; + ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3); + ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2); + ud->echan_cnt = UDMA_CAP2_ECHAN_CNT(cap2); + ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2); ch_count = ud->tchan_cnt + ud->rchan_cnt; + /* Set up the throughput level start indexes */ + if (of_device_is_compatible(dev->of_node, + "ti,am654-navss-main-udmap")) { + ud->tpl_levels = 2; + ud->tpl_start_idx[0] = 8; + } else if (of_device_is_compatible(dev->of_node, + "ti,am654-navss-mcu-udmap")) { + ud->tpl_levels = 2; + ud->tpl_start_idx[0] = 2; + } else if (UDMA_CAP3_UCHAN_CNT(cap3)) { + ud->tpl_levels = 3; + ud->tpl_start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3); + ud->tpl_start_idx[0] = ud->tpl_start_idx[1] + + UDMA_CAP3_HCHAN_CNT(cap3); + } else if (UDMA_CAP3_HCHAN_CNT(cap3)) { + ud->tpl_levels = 2; + ud->tpl_start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); + } else { + ud->tpl_levels = 1; + } + ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), sizeof(unsigned long), GFP_KERNEL); ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans), diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h index 128d8744a435..09c4529e013d 100644 --- a/drivers/dma/ti/k3-udma.h +++ b/drivers/dma/ti/k3-udma.h @@ -18,52 +18,41 @@ #define UDMA_RX_FLOW_ID_FW_OES_REG 0x80 #define UDMA_RX_FLOW_ID_FW_STATUS_REG 0x88 -/* TX chan RT regs */ -#define UDMA_TCHAN_RT_CTL_REG 0x0 -#define UDMA_TCHAN_RT_SWTRIG_REG 0x8 -#define UDMA_TCHAN_RT_STDATA_REG 0x80 - -#define UDMA_TCHAN_RT_PEER_REG(i) (0x200 + ((i) * 0x4)) -#define UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG \ - UDMA_TCHAN_RT_PEER_REG(0) /* PSI-L: 0x400 */ -#define UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG \ - UDMA_TCHAN_RT_PEER_REG(1) /* PSI-L: 0x401 */ -#define UDMA_TCHAN_RT_PEER_BCNT_REG \ - UDMA_TCHAN_RT_PEER_REG(4) /* PSI-L: 0x404 */ -#define UDMA_TCHAN_RT_PEER_RT_EN_REG \ - UDMA_TCHAN_RT_PEER_REG(8) /* PSI-L: 0x408 */ - -#define UDMA_TCHAN_RT_PCNT_REG 0x400 -#define UDMA_TCHAN_RT_BCNT_REG 0x408 -#define UDMA_TCHAN_RT_SBCNT_REG 0x410 - -/* RX chan RT regs */ -#define UDMA_RCHAN_RT_CTL_REG 0x0 -#define UDMA_RCHAN_RT_SWTRIG_REG 0x8 -#define UDMA_RCHAN_RT_STDATA_REG 0x80 - -#define UDMA_RCHAN_RT_PEER_REG(i) (0x200 + ((i) * 0x4)) -#define UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG \ - UDMA_RCHAN_RT_PEER_REG(0) /* PSI-L: 0x400 */ -#define UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG \ - UDMA_RCHAN_RT_PEER_REG(1) /* PSI-L: 0x401 */ -#define UDMA_RCHAN_RT_PEER_BCNT_REG \ - UDMA_RCHAN_RT_PEER_REG(4) /* PSI-L: 0x404 */ -#define UDMA_RCHAN_RT_PEER_RT_EN_REG \ - UDMA_RCHAN_RT_PEER_REG(8) /* PSI-L: 0x408 */ - -#define UDMA_RCHAN_RT_PCNT_REG 0x400 -#define UDMA_RCHAN_RT_BCNT_REG 0x408 -#define UDMA_RCHAN_RT_SBCNT_REG 0x410 - -/* UDMA_TCHAN_RT_CTL_REG/UDMA_RCHAN_RT_CTL_REG */ +/* TCHANRT/RCHANRT registers */ +#define UDMA_CHAN_RT_CTL_REG 0x0 +#define UDMA_CHAN_RT_SWTRIG_REG 0x8 +#define UDMA_CHAN_RT_STDATA_REG 0x80 + +#define UDMA_CHAN_RT_PEER_REG(i) (0x200 + ((i) * 0x4)) +#define UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG \ + UDMA_CHAN_RT_PEER_REG(0) /* PSI-L: 0x400 */ +#define UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG \ + UDMA_CHAN_RT_PEER_REG(1) /* PSI-L: 0x401 */ +#define UDMA_CHAN_RT_PEER_BCNT_REG \ + UDMA_CHAN_RT_PEER_REG(4) /* PSI-L: 0x404 */ +#define UDMA_CHAN_RT_PEER_RT_EN_REG \ + UDMA_CHAN_RT_PEER_REG(8) /* PSI-L: 0x408 */ + +#define UDMA_CHAN_RT_PCNT_REG 0x400 +#define UDMA_CHAN_RT_BCNT_REG 0x408 +#define UDMA_CHAN_RT_SBCNT_REG 0x410 + +/* UDMA_CAP Registers */ +#define UDMA_CAP2_TCHAN_CNT(val) ((val) & 0x1ff) +#define UDMA_CAP2_ECHAN_CNT(val) (((val) >> 9) & 0x1ff) +#define UDMA_CAP2_RCHAN_CNT(val) (((val) >> 18) & 0x1ff) +#define UDMA_CAP3_RFLOW_CNT(val) ((val) & 0x3fff) +#define UDMA_CAP3_HCHAN_CNT(val) (((val) >> 14) & 0x1ff) +#define UDMA_CAP3_UCHAN_CNT(val) (((val) >> 23) & 0x1ff) + +/* UDMA_CHAN_RT_CTL_REG */ #define UDMA_CHAN_RT_CTL_EN BIT(31) #define UDMA_CHAN_RT_CTL_TDOWN BIT(30) #define UDMA_CHAN_RT_CTL_PAUSE BIT(29) #define UDMA_CHAN_RT_CTL_FTDOWN BIT(28) #define UDMA_CHAN_RT_CTL_ERROR BIT(0) -/* UDMA_TCHAN_RT_PEER_RT_EN_REG/UDMA_RCHAN_RT_PEER_RT_EN_REG (PSI-L: 0x408) */ +/* UDMA_CHAN_RT_PEER_RT_EN_REG */ #define UDMA_PEER_RT_EN_ENABLE BIT(31) #define UDMA_PEER_RT_EN_TEARDOWN BIT(30) #define UDMA_PEER_RT_EN_PAUSE BIT(29) diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index cd60fa6d6750..4f733d37a22e 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -287,6 +287,8 @@ struct xgene_dma_chan { /** * struct xgene_dma - internal representation of an X-Gene DMA device + * @dev: reference to this device's struct device + * @clk: reference to this device's clock * @err_irq: DMA error irq number * @ring_num: start id number for DMA ring * @csr_dma: base for DMA register access |