From 080edf75d337d35faa6fc3df99342b10d2848d16 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 14:26:31 +0200 Subject: [PATCH 1/9] dmaengine: hsu: set HSU_CH_MTSR to memory width HSU_CH_MTSR register should be programmed to a minimum size to transfer. This size on a memory side of the transfer. Program it accordingly. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/hsu/hsu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index eef145edb936..c7643e022578 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -64,10 +64,10 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) if (hsuc->direction == DMA_MEM_TO_DEV) { bsr = config->dst_maxburst; - mtsr = config->dst_addr_width; + mtsr = config->src_addr_width; } else if (hsuc->direction == DMA_DEV_TO_MEM) { bsr = config->src_maxburst; - mtsr = config->src_addr_width; + mtsr = config->dst_addr_width; } hsu_chan_disable(hsuc); From a197f3c7d48c0c1f45076ea47533a76ba9b1a959 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 14:26:33 +0200 Subject: [PATCH 2/9] dmaengine: hsu: correct residue calculation of active descriptor The commit f0579c8ceaf1 ("dmaengine: hsu: speed up residue calculation") speeded up calculation of the queued descriptor but broke the initial residue value for active descriptor. In accordance with documentation the hardware descriptor is updated each time DMA transfered some bytes. It means we have to calculate a sum of lengths of non-submitted hardware descriptors and whatever current values in the hardware. Do this straightforward. Fixes: f0579c8ceaf1 ("dmaengine: hsu: speed up residue calculation") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/hsu/hsu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index c7643e022578..b3b212146620 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -254,10 +254,13 @@ static void hsu_dma_issue_pending(struct dma_chan *chan) static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) { struct hsu_dma_desc *desc = hsuc->desc; - size_t bytes = desc->length; + size_t bytes = 0; int i; - i = desc->active % HSU_DMA_CHAN_NR_DESC; + for (i = desc->active; i < desc->nents; i++) + bytes += desc->sg[i].len; + + i = HSU_DMA_CHAN_NR_DESC - 1; do { bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); } while (--i >= 0); From 4f4bc0abff79dc9d7ccbd3143adbf8ad1f4fe6ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 14:26:32 +0200 Subject: [PATCH 3/9] dmaengine: hsu: correct use of channel status register There is a typo in documentation regarding to descriptor empty bit (DESCE) which is set to 1 when descriptor is empty. Thus, status register at the end of a transfer usually returns all DESCE bits set and thus it will never be zero. Moreover, there are 2 bits (CDESC) that encode current descriptor, on which interrupt has been asserted. In case when we have few descriptors programmed we might have non-zero value. Remove DESCE and CDESC bits from DMA channel status register (HSU_CH_SR) when reading it. Fixes: 2b49e0c56741 ("dmaengine: append hsu DMA driver") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/hsu/hsu.c | 2 +- drivers/dma/hsu/hsu.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index b3b212146620..ee510515ce18 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) sr = hsu_chan_readl(hsuc, HSU_CH_SR); spin_unlock_irqrestore(&hsuc->vchan.lock, flags); - return sr; + return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY); } irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 578a8ee8cd05..6b070c22b1df 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -41,6 +41,9 @@ #define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) #define HSU_CH_SR_DESCTO_ANY (BIT(11) | BIT(10) | BIT(9) | BIT(8)) #define HSU_CH_SR_CHE BIT(15) +#define HSU_CH_SR_DESCE(x) BIT(16 + (x)) +#define HSU_CH_SR_DESCE_ANY (BIT(19) | BIT(18) | BIT(17) | BIT(16)) +#define HSU_CH_SR_CDESC_ANY (BIT(31) | BIT(30)) /* Bits in HSU_CH_CR */ #define HSU_CH_CR_CHA BIT(0) From 689d3c5ecc9e2eac714d32ca152b72988bc2a67b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 5 Apr 2016 15:20:20 +0300 Subject: [PATCH 4/9] dmaengine: omap-dma: Fix polled channel completion detection and handling When based on the CCR_ENABLE bit the channel is stopped we should not call omap_dma_callback(), only change the return value to DMA_COMPLETE. Client drivers will do the right thing to clean up the channel after the transfer has been completed. Check the CCR_ENABLE only if the channel is running and not paused since pause in sDMA means that the channel is stopped. This will fix one hard to reproduce race condition when the channel is terminated during transfer (affecting cyclic operation). Fixes: 1a7cf7b26f25 ("dmaengine: omap-dma: Handle cases when the channel is polled for completion") Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/omap-dma.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 43bd5aee7ffe..6ea9044d8b71 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -48,6 +48,7 @@ struct omap_chan { unsigned dma_sig; bool cyclic; bool paused; + bool running; int dma_ch; struct omap_desc *desc; @@ -294,6 +295,8 @@ static void omap_dma_start(struct omap_chan *c, struct omap_desc *d) /* Enable channel */ omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE); + + c->running = true; } static void omap_dma_stop(struct omap_chan *c) @@ -355,6 +358,8 @@ static void omap_dma_stop(struct omap_chan *c) omap_dma_chan_write(c, CLNK_CTRL, val); } + + c->running = false; } static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d, @@ -673,15 +678,20 @@ static enum dma_status omap_dma_tx_status(struct dma_chan *chan, struct omap_chan *c = to_omap_dma_chan(chan); struct virt_dma_desc *vd; enum dma_status ret; - uint32_t ccr; unsigned long flags; - ccr = omap_dma_chan_read(c, CCR); - /* The channel is no longer active, handle the completion right away */ - if (!(ccr & CCR_ENABLE)) - omap_dma_callback(c->dma_ch, 0, c); - ret = dma_cookie_status(chan, cookie, txstate); + + if (!c->paused && c->running) { + uint32_t ccr = omap_dma_chan_read(c, CCR); + /* + * The channel is no longer active, set the return value + * accordingly + */ + if (!(ccr & CCR_ENABLE)) + ret = DMA_COMPLETE; + } + if (ret == DMA_COMPLETE || !txstate) return ret; From b96c033cc8fa6484b1771d43e2fcce61e1fa5097 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 5 Apr 2016 15:20:21 +0300 Subject: [PATCH 5/9] dmaengine: omap-dma: Do not suppress interrupts for memcpy If the client queues up more transfers the driver will not able to move to the next transfer without knowing that the previous descriptor is completed. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/omap-dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 6ea9044d8b71..1e984e18c126 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -955,9 +955,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( d->ccr = c->ccr; d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; - d->cicr = CICR_DROP_IE; - if (tx_flags & DMA_PREP_INTERRUPT) - d->cicr |= CICR_FRAME_IE; + d->cicr = CICR_DROP_IE | CICR_FRAME_IE; d->csdp = data_type; From 330ed4da2c8eb821e70b1366a7af53eb1c7421fb Mon Sep 17 00:00:00 2001 From: Franck Jullien Date: Tue, 22 Mar 2016 11:52:22 +0100 Subject: [PATCH 6/9] dmaengine: vdma: don't crash when bad channel is requested When client request a non existing channel from of_dma_xilinx_xlate we get a NULL pointer dereferencing. This patch fix this problem. Signed-off-by: Franck Jullien Acked-by: Laurent Pinchart Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_vdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c index 0ee0321868d3..ef67f278e076 100644 --- a/drivers/dma/xilinx/xilinx_vdma.c +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -1236,7 +1236,7 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, struct xilinx_vdma_device *xdev = ofdma->of_dma_data; int chan_id = dma_spec->args[0]; - if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE) + if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) return NULL; return dma_get_slave_channel(&xdev->chan[chan_id]->common); From 23f49fd2ea9bc8e1c8cff0126cd71b071ea9e91f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 6 Apr 2016 13:01:46 +0300 Subject: [PATCH 7/9] dmaengine: edma: Remove dynamic TPTC power management feature The dynamic or on demand pm_runtime does not work correctly on am335x and am437x due to interference with hwmod. Fall back using the pm_runtime usage as it was in the old driver stack, meaning that at probe time call pm_runtime_enable() and pm_runtime_get_sync() for the TPTCs as well. Fixes: 1be5336bc7ba ("dmaengine: edma: New device tree binding") Signed-off-by: Peter Ujfalusi Reported-by: Tero Kristo Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index ee3463e774f8..13b6a23dc06b 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1570,32 +1570,6 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) return IRQ_HANDLED; } -static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable) -{ - struct platform_device *tc_pdev; - int ret; - - if (!IS_ENABLED(CONFIG_OF) || !tc) - return; - - tc_pdev = of_find_device_by_node(tc->node); - if (!tc_pdev) { - pr_err("%s: TPTC device is not found\n", __func__); - return; - } - if (!pm_runtime_enabled(&tc_pdev->dev)) - pm_runtime_enable(&tc_pdev->dev); - - if (enable) - ret = pm_runtime_get_sync(&tc_pdev->dev); - else - ret = pm_runtime_put_sync(&tc_pdev->dev); - - if (ret < 0) - pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__, - enable ? "get" : "put", dev_name(&tc_pdev->dev)); -} - /* Alloc channel resources */ static int edma_alloc_chan_resources(struct dma_chan *chan) { @@ -1632,8 +1606,6 @@ static int edma_alloc_chan_resources(struct dma_chan *chan) EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id, echan->hw_triggered ? "HW" : "SW"); - edma_tc_set_pm_state(echan->tc, true); - return 0; err_slot: @@ -1670,7 +1642,6 @@ static void edma_free_chan_resources(struct dma_chan *chan) echan->alloced = false; } - edma_tc_set_pm_state(echan->tc, false); echan->tc = NULL; echan->hw_triggered = false; @@ -2417,10 +2388,8 @@ static int edma_pm_suspend(struct device *dev) int i; for (i = 0; i < ecc->num_channels; i++) { - if (echan[i].alloced) { + if (echan[i].alloced) edma_setup_interrupt(&echan[i], false); - edma_tc_set_pm_state(echan[i].tc, false); - } } return 0; @@ -2450,8 +2419,6 @@ static int edma_pm_resume(struct device *dev) /* Set up channel -> slot mapping for the entry slot */ edma_set_chmap(&echan[i], echan[i].slot[0]); - - edma_tc_set_pm_state(echan[i].tc, true); } } @@ -2475,7 +2442,8 @@ static struct platform_driver edma_driver = { static int edma_tptc_probe(struct platform_device *pdev) { - return 0; + pm_runtime_enable(&pdev->dev); + return pm_runtime_get_sync(&pdev->dev); } static struct platform_driver edma_tptc_driver = { From a482f4e0d848d0914ff119ef32fe1d11434d570c Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 6 Apr 2016 13:01:47 +0300 Subject: [PATCH 8/9] dmaengine: edma: special case slot limit workaround Currently drivers are limited to 19 slots for cyclic transfers. However, if the DMA burst size is the same as the period size, the period size can be changed to the full buffer size and intermediate interrupts activated. Since intermediate interrupts will trigger for each burst and the burst size is the same as the period size, the driver will get interrupts each period as expected. This has the benefit of allowing the functionality of many more slots, but only uses 2 slots. This workaround is only active if more than 19 slots are needed and the burst size matches the period size. Acked-by: Peter Ujfalusi Signed-off-by: John Ogness Signed-off-by: Sekhar Nori Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 13b6a23dc06b..04070baab78a 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1238,6 +1238,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( struct edma_desc *edesc; dma_addr_t src_addr, dst_addr; enum dma_slave_buswidth dev_width; + bool use_intermediate = false; u32 burst; int i, ret, nslots; @@ -1279,8 +1280,21 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( * but the synchronization is difficult to achieve with Cyclic and * cannot be guaranteed, so we error out early. */ - if (nslots > MAX_NR_SG) - return NULL; + if (nslots > MAX_NR_SG) { + /* + * If the burst and period sizes are the same, we can put + * the full buffer into a single period and activate + * intermediate interrupts. This will produce interrupts + * after each burst, which is also after each desired period. + */ + if (burst == period_len) { + period_len = buf_len; + nslots = 2; + use_intermediate = true; + } else { + return NULL; + } + } edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]), GFP_ATOMIC); @@ -1358,8 +1372,13 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( /* * Enable period interrupt only if it is requested */ - if (tx_flags & DMA_PREP_INTERRUPT) + if (tx_flags & DMA_PREP_INTERRUPT) { edesc->pset[i].param.opt |= TCINTEN; + + /* Also enable intermediate interrupts if necessary */ + if (use_intermediate) + edesc->pset[i].param.opt |= ITCINTEN; + } } /* Place the cyclic channel to highest priority queue */ From 3fe6409c23e2bee4b2b1b6d671d2da8daa15271c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 8 Apr 2016 16:22:17 +0300 Subject: [PATCH 9/9] dmaengine: dw: fix master selection The commit 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage") cleaned up the code to avoid usage of depricated slave_id member of generic slave configuration. Meanwhile it broke the master selection by removing important call to dwc_set_masters() in ->device_alloc_chan_resources() which copied masters from custom slave configuration to the internal channel structure. Everything works until now since there is no customized connection of DesignWare DMA IP to the bus, i.e. one bus and one or more masters are in use. The configurations where 2 masters are connected to the different masters are not working anymore. We are expecting one user of such configuration and need to select masters properly. Besides that it is obviously a performance regression since only one master is in use in multi-master configuration. Select masters in accordance with what user asked for. Keep this patch in a form more suitable for back porting. We are safe to take necessary data in ->device_alloc_chan_resources() because we don't support generic slave configuration embedded into custom one, and thus the only way to provide such is to use the parameter to a filter function which is called exactly before channel resource allocation. While here, replase BUG_ON to less noisy dev_warn() and prevent channel allocation in case of error. Fixes: 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 5ad0ec1f0e29..97199b3c25a2 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_dma_slave *dws = dwc->chan.private; u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); if (dwc->initialized == true) return; - if (dws) { - /* - * We need controller-specific data to set up slave - * transfers. - */ - BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - - cfghi |= DWC_CFGH_DST_PER(dws->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dws->src_id); - } else { - cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); - } + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); channel_writel(dwc, CFG_LO, cfglo); channel_writel(dwc, CFG_HI, cfghi); @@ -941,7 +929,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma_slave *dws = param; - if (!dws || dws->dma_dev != chan->device->dev) + if (dws->dma_dev != chan->device->dev) return false; /* We have to copy data since dws can be temporary storage */ @@ -1165,6 +1153,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + /* Enable controller here if needed */ if (!dw->in_use) dw_dma_on(dw); @@ -1226,6 +1222,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + + dwc->src_master = 0; + dwc->dst_master = 0; + dwc->initialized = false; /* Disable interrupts */