From 484f910e93b48c1d8890d8330a87e34ae61f4782 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 27 Oct 2020 14:34:09 -0700 Subject: [PATCH 01/10] dmaengine: idxd: fix wq config registers offset programming DSA spec v1.1 [1] updated to include a stride size register for WQ configuration that will specify how much space is reserved for the WQ configuration register set. This change is expected to be in the final gen1 DSA hardware. Fix the driver to use WQCFG_OFFSET() for all WQ offset calculation and fixup WQCFG_OFFSET() to use the new calculated wq size. [1]: https://software.intel.com/content/www/us/en/develop/download/intel-data-streaming-accelerator-preliminary-architecture-specification.html Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160383444959.48058.14249265538404901781.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 29 ++++++++++++++--------------- drivers/dma/idxd/idxd.h | 3 ++- drivers/dma/idxd/init.c | 5 +++++ drivers/dma/idxd/registers.h | 23 ++++++++++++++++++++++- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 200b9109cacf..506ac85c4a7f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -295,7 +295,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); - memset(&wq->wqcfg, 0, sizeof(wq->wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->size = 0; wq->group = NULL; @@ -304,8 +304,8 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); iowrite32(0, idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, @@ -539,10 +539,10 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(&wq->wqcfg, 0, sizeof(union wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); /* byte 0-3 */ - wq->wqcfg.wq_size = wq->size; + wq->wqcfg->wq_size = wq->size; if (wq->size == 0) { dev_warn(dev, "Incorrect work queue size: 0\n"); @@ -550,22 +550,21 @@ static int idxd_wq_config_write(struct idxd_wq *wq) } /* bytes 4-7 */ - wq->wqcfg.wq_thresh = wq->threshold; + wq->wqcfg->wq_thresh = wq->threshold; /* byte 8-11 */ - wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL); - wq->wqcfg.mode = 1; - - wq->wqcfg.priority = wq->priority; + wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + wq->wqcfg->mode = 1; + wq->wqcfg->priority = wq->priority; /* bytes 12-15 */ - wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes); - wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size); + wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); + wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); dev_dbg(dev, "WQ %d CFGs\n", wq->id); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); - iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, ioread32(idxd->reg_base + wq_offset)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c64df197e724..d48f193daacc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -103,7 +103,7 @@ struct idxd_wq { u32 priority; enum idxd_wq_state state; unsigned long flags; - union wqcfg wqcfg; + union wqcfg *wqcfg; u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; @@ -183,6 +183,7 @@ struct idxd_device { int max_wq_size; int token_limit; int nr_tokens; /* non-reserved tokens */ + unsigned int wqcfg_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 11e5ce168177..0a4432b063b5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -178,6 +178,9 @@ static int idxd_setup_internals(struct idxd_device *idxd) wq->idxd_cdev.minor = -1; wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; + wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); + if (!wq->wqcfg) + return -ENOMEM; } for (i = 0; i < idxd->max_engines; i++) { @@ -251,6 +254,8 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size); idxd->max_wqs = idxd->hw.wq_cap.num_wqs; dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs); + idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size); /* reading operation capabilities */ for (i = 0; i < 4; i++) { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index a39e7ae6b3d9..aef5a902829e 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -43,7 +43,8 @@ union wq_cap_reg { struct { u64 total_wq_size:16; u64 num_wqs:8; - u64 rsvd:24; + u64 wqcfg_size:4; + u64 rsvd:20; u64 shared_mode:1; u64 dedicated_mode:1; u64 rsvd2:1; @@ -55,6 +56,7 @@ union wq_cap_reg { u64 bits; } __packed; #define IDXD_WQCAP_OFFSET 0x20 +#define IDXD_WQCFG_MIN 5 union group_cap_reg { struct { @@ -333,4 +335,23 @@ union wqcfg { }; u32 bits[8]; } __packed; + +/* + * This macro calculates the offset into the WQCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define WQCFG_OFFSET(_idxd_dev, n, ofs) \ +({\ + typeof(_idxd_dev) __idxd_dev = (_idxd_dev); \ + (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs); \ +}) + +#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) + #endif From 768664114b1ac9184f1dc6217d9c930a08ffbfa8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:01:15 +0100 Subject: [PATCH 02/10] dmaengine: ti: k3-udma: fix -Wenum-conversion warning gcc warns about a mismatch argument type when passing 'false' into a function that expects an enum: drivers/dma/ti/k3-udma-private.c: In function 'xudma_tchan_get': drivers/dma/ti/k3-udma-private.c:86:34: warning: implicit conversion from 'enum ' to 'enum udma_tp_level' [-Wenum-conversion] 86 | return __udma_reserve_##res(ud, false, id); \ | ^~~~~ drivers/dma/ti/k3-udma-private.c:95:1: note: in expansion of macro 'XUDMA_GET_PUT_RESOURCE' 95 | XUDMA_GET_PUT_RESOURCE(tchan); | ^~~~~~~~~~~~~~~~~~~~~~ In this case, false has the same numerical value as UDMA_TP_NORMAL, so passing that is most likely the correct way to avoid the warning without changing the behavior. Fixes: d70241913413 ("dmaengine: ti: k3-udma: Add glue layer for non DMAengine users") Signed-off-by: Arnd Bergmann Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201026160123.3704531-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma-private.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c index aa24e554f7b4..8563a392f30b 100644 --- a/drivers/dma/ti/k3-udma-private.c +++ b/drivers/dma/ti/k3-udma-private.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL(xudma_rflow_is_gp); #define XUDMA_GET_PUT_RESOURCE(res) \ struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id) \ { \ - return __udma_reserve_##res(ud, false, id); \ + return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id); \ } \ EXPORT_SYMBOL(xudma_##res##_get); \ \ From 0ba2df09f1500d3f27398a3382b86d39c3e6abe2 Mon Sep 17 00:00:00 2001 From: Marc Ferland Date: Wed, 4 Nov 2020 12:30:04 +0530 Subject: [PATCH 03/10] dmaengine: xilinx_dma: use readl_poll_timeout_atomic variant The xilinx_dma_poll_timeout macro is sometimes called while holding a spinlock (see xilinx_dma_issue_pending() for an example) this means we shouldn't sleep when polling the dma channel registers. To address it in xilinx poll timeout macro use readl_poll_timeout_atomic instead of readl_poll_timeout variant. Signed-off-by: Marc Ferland Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index ecff35402860..d858d540b88a 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -517,8 +517,8 @@ struct xilinx_dma_device { #define to_dma_tx_descriptor(tx) \ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ - readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ - cond, delay_us, timeout_us) + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) /* IO accessors */ static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) From c8ae7932997d0cc92d016829138074c7520248e5 Mon Sep 17 00:00:00 2001 From: Matthew Murrian Date: Wed, 4 Nov 2020 12:30:05 +0530 Subject: [PATCH 04/10] dmaengine: xilinx_dma: Fix usage of xilinx_aximcdma_tx_segment Several code sections incorrectly use struct xilinx_axidma_tx_segment instead of struct xilinx_aximcdma_tx_segment when operating as Multichannel DMA. As their structures are similar, this just works. Fixes: 6ccd692bfb7f ("dmaengine: xilinx_dma: Add Xilinx AXI MCDMA Engine driver support") Signed-off-by: Matthew Murrian Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index d858d540b88a..9fd924cf2337 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -948,8 +948,10 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, { struct xilinx_cdma_tx_segment *cdma_seg; struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_aximcdma_tx_segment *aximcdma_seg; struct xilinx_cdma_desc_hw *cdma_hw; struct xilinx_axidma_desc_hw *axidma_hw; + struct xilinx_aximcdma_desc_hw *aximcdma_hw; struct list_head *entry; u32 residue = 0; @@ -961,13 +963,23 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, cdma_hw = &cdma_seg->hw; residue += (cdma_hw->control - cdma_hw->status) & chan->xdev->max_buffer_len; - } else { + } else if (chan->xdev->dma_config->dmatype == + XDMA_TYPE_AXIDMA) { axidma_seg = list_entry(entry, struct xilinx_axidma_tx_segment, node); axidma_hw = &axidma_seg->hw; residue += (axidma_hw->control - axidma_hw->status) & chan->xdev->max_buffer_len; + } else { + aximcdma_seg = + list_entry(entry, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_hw = &aximcdma_seg->hw; + residue += + (aximcdma_hw->control - aximcdma_hw->status) & + chan->xdev->max_buffer_len; } } @@ -1135,7 +1147,7 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) * ((i + 1) % XILINX_DMA_NUM_DESCS)); chan->seg_mv[i].phys = chan->seg_p + - sizeof(*chan->seg_v) * i; + sizeof(*chan->seg_mv) * i; list_add_tail(&chan->seg_mv[i].node, &chan->free_seg_list); } @@ -1560,7 +1572,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) { struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; - struct xilinx_axidma_tx_segment *tail_segment; + struct xilinx_aximcdma_tx_segment *tail_segment; u32 reg; /* @@ -1582,7 +1594,7 @@ static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) tail_desc = list_last_entry(&chan->pending_list, struct xilinx_dma_tx_descriptor, node); tail_segment = list_last_entry(&tail_desc->segments, - struct xilinx_axidma_tx_segment, node); + struct xilinx_aximcdma_tx_segment, node); reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest)); @@ -1864,6 +1876,7 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_vdma_tx_segment *tail_segment; struct xilinx_dma_tx_descriptor *tail_desc; struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment; struct xilinx_cdma_tx_segment *cdma_tail_segment; if (list_empty(&chan->pending_list)) @@ -1885,11 +1898,17 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_cdma_tx_segment, node); cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; - } else { + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { axidma_tail_segment = list_last_entry(&tail_desc->segments, struct xilinx_axidma_tx_segment, node); axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + aximcdma_tail_segment = + list_last_entry(&tail_desc->segments, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; } /* From 96d5d884f78306206d745d856aad322becd100c3 Mon Sep 17 00:00:00 2001 From: Matthew Murrian Date: Wed, 4 Nov 2020 12:30:06 +0530 Subject: [PATCH 05/10] dmaengine: xilinx_dma: Fix SG capability check for MCDMA The SG capability is inherently present with Multichannel DMA operation. The register used to check for this capability with other DMA driver types is not defined for MCDMA. Fixes: 6ccd692bfb7f ("dmaengine: xilinx_dma: Add Xilinx AXI MCDMA Engine driver support") Signed-off-by: Matthew Murrian Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-4-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 9fd924cf2337..22faea653ea8 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2855,10 +2855,11 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, chan->stop_transfer = xilinx_dma_stop_transfer; } - /* check if SG is enabled (only for AXIDMA and CDMA) */ + /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */ if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) { - if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & - XILINX_DMA_DMASR_SG_MASK) + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA || + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_SG_MASK) chan->has_sg = true; dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id, chan->has_sg ? "enabled" : "disabled"); From 29a25b9246f7f24203d30d59424cbe22bd905dfc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 9 Nov 2020 17:40:13 +0200 Subject: [PATCH 06/10] dmaengine: ti: omap-dma: Block PM if SDMA is busy to fix audio We now use cpu_pm for saving and restoring device context for deeper SoC idle states. But for omap3, we must also block idle if SDMA is busy. If we don't block idle when SDMA is busy, we eventually end up saving and restoring SDMA register state on PER domain idle while SDMA is active and that causes at least audio playback to fail. Fixes: 4c74ecf79227 ("dmaengine: ti: omap-dma: Add device tree match data and use it for cpu_pm") Reported-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Tested-by: Peter Ujfalusi Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201109154013.11950-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index c9fe5e3a6b55..268a08058714 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1522,29 +1522,38 @@ static void omap_dma_free(struct omap_dmadev *od) } } +/* Currently used by omap2 & 3 to block deeper SoC idle states */ +static bool omap_dma_busy(struct omap_dmadev *od) +{ + struct omap_chan *c; + int lch = -1; + + while (1) { + lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1); + if (lch >= od->lch_count) + break; + c = od->lch_map[lch]; + if (!c) + continue; + if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) + return true; + } + + return false; +} + /* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */ static int omap_dma_busy_notifier(struct notifier_block *nb, unsigned long cmd, void *v) { struct omap_dmadev *od; - struct omap_chan *c; - int lch = -1; od = container_of(nb, struct omap_dmadev, nb); switch (cmd) { case CPU_CLUSTER_PM_ENTER: - while (1) { - lch = find_next_bit(od->lch_bitmap, od->lch_count, - lch + 1); - if (lch >= od->lch_count) - break; - c = od->lch_map[lch]; - if (!c) - continue; - if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) - return NOTIFY_BAD; - } + if (omap_dma_busy(od)) + return NOTIFY_BAD; break; case CPU_CLUSTER_PM_ENTER_FAILED: case CPU_CLUSTER_PM_EXIT: @@ -1595,6 +1604,8 @@ static int omap_dma_context_notifier(struct notifier_block *nb, switch (cmd) { case CPU_CLUSTER_PM_ENTER: + if (omap_dma_busy(od)) + return NOTIFY_BAD; omap_dma_context_save(od); break; case CPU_CLUSTER_PM_ENTER_FAILED: From 8326be9f1c0bb498baf134878a8deb8a952e0135 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Nov 2020 15:23:46 -0700 Subject: [PATCH 07/10] dmaengine: idxd: fix mapping of portal size Portal size is 4k. Current code is mapping all 4 portals in a single chunk. Restrict the mapped portal size to a single portal to ensure that submission only goes to the intended portal address. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160513342642.510187.16450549281618747065.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 2 +- drivers/dma/idxd/registers.h | 2 +- drivers/dma/idxd/submit.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 506ac85c4a7f..663344987e3f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -271,7 +271,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq) resource_size_t start; start = pci_resource_start(pdev, IDXD_WQ_BAR); - start = start + wq->id * IDXD_PORTAL_SIZE; + start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED); wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); if (!wq->dportal) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index aef5a902829e..54390334c243 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -8,7 +8,7 @@ #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 -#define IDXD_PORTAL_SIZE 0x4000 +#define IDXD_PORTAL_SIZE PAGE_SIZE /* MMIO Device BAR0 Registers */ #define IDXD_VER_OFFSET 0x00 diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 156a1ee233aa..417048e3c42a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -74,7 +74,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED); + portal = wq->dportal; /* * The wmb() flushes writes to coherent DMA data before possibly * triggering a DMA read. The wmb() is necessary even on UP because From 4e7d4f295dee1feed96b2b0a31d80d673b5465e8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 13 Nov 2020 09:12:48 +0100 Subject: [PATCH 08/10] dmaengine: ioatdma: remove unused function missed during dma_v2 removal Commit 7f832645d0e5 ("dmaengine: ioatdma: remove ioatdma v2 registration") missed to remove dca2_tag_map_valid() during its removal. Hence, since then, dca2_tag_map_valid() is unused and make CC=clang W=1 warns: drivers/dma/ioat/dca.c:44:19: warning: unused function 'dca2_tag_map_valid' [-Wunused-function] So, remove this unused function and get rid of a -Wused-function warning. Signed-off-by: Lukas Bulwahn Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201113081248.26416-1-lukas.bulwahn@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dca.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 0be385587c4c..289c59ed74b9 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -40,16 +40,6 @@ #define DCA2_TAG_MAP_BYTE3 0x82 #define DCA2_TAG_MAP_BYTE4 0x82 -/* verify if tag map matches expected values */ -static inline int dca2_tag_map_valid(u8 *tag_map) -{ - return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && - (tag_map[1] == DCA2_TAG_MAP_BYTE1) && - (tag_map[2] == DCA2_TAG_MAP_BYTE2) && - (tag_map[3] == DCA2_TAG_MAP_BYTE3) && - (tag_map[4] == DCA2_TAG_MAP_BYTE4)); -} - /* * "Legacy" DCA systems do not implement the DCA register set in the * I/OAT device. Software needs direct support for their tag mappings. From e773ca7da8beeca7f17fe4c9d1284a2b66839cc1 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Sat, 14 Nov 2020 11:55:06 +0800 Subject: [PATCH 09/10] dmaengine: pl330: _prep_dma_memcpy: Fix wrong burst size Actually, burst size is equal to '1 << desc->rqcfg.brst_size'. we should use burst size, not desc->rqcfg.brst_size. dma memcpy performance on Rockchip RV1126 @ 1512MHz A7, 1056MHz LPDDR3, 200MHz DMA: dmatest: /# echo dma0chan0 > /sys/module/dmatest/parameters/channel /# echo 4194304 > /sys/module/dmatest/parameters/test_buf_size /# echo 8 > /sys/module/dmatest/parameters/iterations /# echo y > /sys/module/dmatest/parameters/norandom /# echo y > /sys/module/dmatest/parameters/verbose /# echo 1 > /sys/module/dmatest/parameters/run dmatest: dma0chan0-copy0: result #1: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #2: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #3: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #4: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #5: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #6: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #7: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #8: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 Before: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 48 iops 200338 KB/s (0) After this patch: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 179 iops 734873 KB/s (0) After this patch and increase dma clk to 400MHz: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 259 iops 1062929 KB/s (0) Signed-off-by: Sugar Zhang Link: https://lore.kernel.org/r/1605326106-55681-1-git-send-email-sugar.zhang@rock-chips.com Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e9f0101d92fa..0f5c19370f6d 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2799,7 +2799,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, * If burst size is smaller than bus width then make sure we only * transfer one at a time to avoid a burst stradling an MFIFO entry. */ - if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + if (burst * 8 < pl330->pcfg.data_bus_width) desc->rqcfg.brst_len = 1; desc->bytes_requested = len; From 7e4be1290a38b3dd4a77cdf4565c9ffe7e620013 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Nov 2020 13:16:31 +0300 Subject: [PATCH 10/10] dmaengine: fix error codes in channel_register() The error codes were not set on some of these error paths. Also the error handling was more confusing than it needed to be so I cleaned it up and shuffled it around a bit. Fixes: d2fb0a043838 ("dmaengine: break out channel registration") Signed-off-by: Dan Carpenter Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201113101631.GE168908@mwanda Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 7974fa0400d8..962cbb5e5f7f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1039,16 +1039,15 @@ static int get_dma_id(struct dma_device *device) static int __dma_async_device_channel_register(struct dma_device *device, struct dma_chan *chan) { - int rc = 0; + int rc; chan->local = alloc_percpu(typeof(*chan->local)); if (!chan->local) - goto err_out; + return -ENOMEM; chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL); if (!chan->dev) { - free_percpu(chan->local); - chan->local = NULL; - goto err_out; + rc = -ENOMEM; + goto err_free_local; } /* @@ -1061,7 +1060,8 @@ static int __dma_async_device_channel_register(struct dma_device *device, if (chan->chan_id < 0) { pr_err("%s: unable to alloc ida for chan: %d\n", __func__, chan->chan_id); - goto err_out; + rc = chan->chan_id; + goto err_free_dev; } chan->dev->device.class = &dma_devclass; @@ -1082,9 +1082,10 @@ static int __dma_async_device_channel_register(struct dma_device *device, mutex_lock(&device->chan_mutex); ida_free(&device->chan_ida, chan->chan_id); mutex_unlock(&device->chan_mutex); - err_out: - free_percpu(chan->local); + err_free_dev: kfree(chan->dev); + err_free_local: + free_percpu(chan->local); return rc; }