From 87a2f622cc6446c7d09ac655b7b9b04886f16a4c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 18 Sep 2017 11:16:26 +0300 Subject: [PATCH 1/4] dmaengine: edma: Align the memcpy acnt array size with the transfer Memory to Memory transfers does not have any special alignment needs regarding to acnt array size, but if one of the areas are in memory mapped regions (like PCIe memory), we need to make sure that the acnt array size is aligned with the mem copy parameters. Before "dmaengine: edma: Optimize memcpy operation" change the memcpy was set up in a different way: acnt == number of bytes in a word based on __ffs((src | dest | len), bcnt and ccnt for looping the necessary number of words to comlete the trasnfer. Instead of reverting the commit we can fix it to make sure that the ACNT size is aligned to the traswnfer. Fixes: df6694f80365a (dmaengine: edma: Optimize memcpy operation) Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3879f80a4815..a7ea20e7b8e9 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1143,11 +1143,24 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); - unsigned int width, pset_len; + unsigned int width, pset_len, array_size; if (unlikely(!echan || !len)) return NULL; + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + if (len < SZ_64K) { /* * Transfer size less than 64K can be handled with one paRAM @@ -1169,7 +1182,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( * When the full_length is multibple of 32767 one slot can be * used to complete the transfer. */ - width = SZ_32K - 1; + width = array_size; pset_len = rounddown(len, width); /* One slot is enough for lengths multiple of (SZ_32K -1) */ if (unlikely(pset_len == len)) @@ -1217,7 +1230,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( } dest += pset_len; src += pset_len; - pset_len = width = len % (SZ_32K - 1); + pset_len = width = len % array_size; ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, width, pset_len, DMA_MEM_TO_MEM); From 2ccb4837c938357233a0b8818e3ca3e58242c952 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 21 Sep 2017 14:35:32 +0300 Subject: [PATCH 2/4] dmaengine: ti-dma-crossbar: Fix possible race condition with dma_inuse When looking for unused xbar_out lane we should also protect the set_bit() call with the same mutex to protect against concurrent threads picking the same ID. Fixes: ec9bfa1e1a796 ("dmaengine: ti-dma-crossbar: dra7: Use bitops instead of idr") Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti-dma-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 2f65a8fde21d..f1d04b70ee67 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -262,13 +262,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_lock(&xbar->mutex); map->xbar_out = find_first_zero_bit(xbar->dma_inuse, xbar->dma_requests); - mutex_unlock(&xbar->mutex); if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); map->xbar_in = (u16)dma_spec->args[0]; From d9ec46416de5ef83220f1c7010ee0f5d1be1d753 Mon Sep 17 00:00:00 2001 From: Sylvain Lesne Date: Mon, 18 Sep 2017 13:08:00 +0200 Subject: [PATCH 3/4] dmaengine: altera: fix response FIFO emptying Commit 6084fc2ec478 ("dmaengine: altera: Use macros instead of structs to describe the registers") introduced a minus sign before a register offset. This leads to soft-locks of the DMA controller, since reading the last status byte is required to pop the response from the FIFO. Failing to do so will lead to a full FIFO, which means that the DMA controller will stop processing descriptors. Signed-off-by: Sylvain Lesne Reviewed-by: Stefan Roese Signed-off-by: Vinod Koul --- drivers/dma/altera-msgdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 32905d5606ac..35cbf2365f68 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -698,7 +698,7 @@ static void msgdma_tasklet(unsigned long data) * bits. So we need to just drop these values. */ size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED); - status = ioread32(mdev->resp - MSGDMA_RESP_STATUS); + status = ioread32(mdev->resp + MSGDMA_RESP_STATUS); msgdma_complete_descriptor(mdev); msgdma_chan_desc_cleanup(mdev); From edf10919e5fc8dfd10e57ed72f651204559bc6ba Mon Sep 17 00:00:00 2001 From: Sylvain Lesne Date: Mon, 18 Sep 2017 13:08:01 +0200 Subject: [PATCH 4/4] dmaengine: altera: fix spinlock usage Since this lock is acquired in both process and IRQ context, failing to to disable IRQs when trying to acquire the lock in process context can lead to deadlocks. Signed-off-by: Sylvain Lesne Reviewed-by: Stefan Roese Signed-off-by: Vinod Koul --- drivers/dma/altera-msgdma.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 35cbf2365f68..339186f25a2a 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -212,11 +212,12 @@ struct msgdma_device { static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) { struct msgdma_sw_desc *desc; + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); list_del(&desc->node); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); INIT_LIST_HEAD(&desc->tx_list); @@ -306,13 +307,14 @@ static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) struct msgdma_device *mdev = to_mdev(tx->chan); struct msgdma_sw_desc *new; dma_cookie_t cookie; + unsigned long flags; new = tx_to_desc(tx); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); cookie = dma_cookie_assign(tx); list_add_tail(&new->node, &mdev->pending_list); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); return cookie; } @@ -336,17 +338,18 @@ msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, struct msgdma_extended_desc *desc; size_t copy; u32 desc_cnt; + unsigned long irqflags; desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); do { /* Allocate and populate the descriptor */ @@ -397,18 +400,19 @@ msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, u32 desc_cnt = 0, i; struct scatterlist *sg; u32 stride; + unsigned long irqflags; for_each_sg(sgl, sg, sg_len, i) desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); avail = sg_dma_len(sgl); @@ -566,10 +570,11 @@ static void msgdma_start_transfer(struct msgdma_device *mdev) static void msgdma_issue_pending(struct dma_chan *chan) { struct msgdma_device *mdev = to_mdev(chan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_start_transfer(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /** @@ -634,10 +639,11 @@ static void msgdma_free_descriptors(struct msgdma_device *mdev) static void msgdma_free_chan_resources(struct dma_chan *dchan) { struct msgdma_device *mdev = to_mdev(dchan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_free_descriptors(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); kfree(mdev->sw_desq); } @@ -682,8 +688,9 @@ static void msgdma_tasklet(unsigned long data) u32 count; u32 __maybe_unused size; u32 __maybe_unused status; + unsigned long flags; - spin_lock(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); /* Read number of responses that are available */ count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL); @@ -704,7 +711,7 @@ static void msgdma_tasklet(unsigned long data) msgdma_chan_desc_cleanup(mdev); } - spin_unlock(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /**