From 2ee471a1e28ec79fbfcdc8900ed0ed74132b0efe Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 18 Jun 2020 08:06:23 -0700 Subject: [PATCH 1/4] spi: spi-geni-qcom: Mo' betta locking If you added a bit of a delay (like a trace_printk) into the ISR for the spi-geni-qcom driver, you would suddenly start seeing some errors spit out. The problem was that, though the ISR itself held a lock, other parts of the driver didn't always grab the lock. One example race was this: CPU0 CPU1 ---- ---- spi_geni_set_cs() mas->cur_mcmd = CMD_CS; geni_se_setup_m_cmd(...) wait_for_completion_timeout(&xfer_done); geni_spi_isr() complete(&xfer_done); pm_runtime_put(mas->dev); ... // back to SPI core spi_geni_transfer_one() setup_fifo_xfer() mas->cur_mcmd = CMD_XFER; mas->cur_cmd = CMD_NONE; // bad! return IRQ_HANDLED; Let's fix this. Before we start messing with hardware, we'll grab the lock to make sure that the IRQ handler from some previous command has really finished. We don't need to hold the lock unless we're in a state where more interrupts can come in, but we at least need to make sure the previous IRQ is done. This lock is used exclusively to prevent the IRQ handler and non-IRQ from stomping on each other. The SPI core handles all other mutual exclusion. As part of this, we change the way that the IRQ handler detects spurious interrupts. Previously we checked for our state variable being set to IRQ_NONE, but that was done outside the spinlock. We could move it into the spinlock, but instead let's just change it to look for the lack of any IRQ status bits being set. This can be done outside the lock--the hardware certainly isn't grabbing or looking at the spinlock when it updates its status register. It's possible that this will fix real (but very rare) errors seen in the field that look like: irq ...: nobody cared (try booting with the "irqpoll" option) NOTE: an alternate strategy considered here was to always make the complete() / spi_finalize_current_transfer() the very last thing in our IRQ handler. With such a change you could consider that we could be "lockless". In that case, though, we'd have to be very careful w/ memory barriers so we made sure we didn't have any bugs with weakly ordered memory. Using spinlocks makes the driver much easier to understand. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200618080459.v4.2.I752ebdcfd5e8bf0de06d66e767b8974932b3620e@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 45 ++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index c7d2c7e45b3f..7d022ccb1b6c 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -151,16 +151,18 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) struct geni_se *se = &mas->se; unsigned long time_left; - reinit_completion(&mas->xfer_done); pm_runtime_get_sync(mas->dev); if (!(slv->mode & SPI_CS_HIGH)) set_flag = !set_flag; + spin_lock_irq(&mas->lock); + reinit_completion(&mas->xfer_done); mas->cur_mcmd = CMD_CS; if (set_flag) geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0); else geni_se_setup_m_cmd(se, SPI_CS_DEASSERT, 0); + spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->xfer_done, HZ); if (!time_left) @@ -307,6 +309,21 @@ static void setup_fifo_xfer(struct spi_transfer *xfer, u32 spi_tx_cfg, len; struct geni_se *se = &mas->se; + /* + * Ensure that our interrupt handler isn't still running from some + * prior command before we start messing with the hardware behind + * its back. We don't need to _keep_ the lock here since we're only + * worried about racing with out interrupt handler. The SPI core + * already handles making sure that we're not trying to do two + * transfers at once or setting a chip select and doing a transfer + * concurrently. + * + * NOTE: we actually _can't_ hold the lock here because possibly we + * might call clk_set_rate() which needs to be able to sleep. + */ + spin_lock_irq(&mas->lock); + spin_unlock_irq(&mas->lock); + spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG); if (xfer->bits_per_word != mas->cur_bits_per_word) { spi_setup_word_len(mas, mode, xfer->bits_per_word); @@ -367,6 +384,12 @@ static void setup_fifo_xfer(struct spi_transfer *xfer, } writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG); mas->cur_mcmd = CMD_XFER; + + /* + * Lock around right before we start the transfer since our + * interrupt could come in at any time now. + */ + spin_lock_irq(&mas->lock); geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION); /* @@ -376,6 +399,7 @@ static void setup_fifo_xfer(struct spi_transfer *xfer, */ if (m_cmd & SPI_TX_ONLY) writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG); + spin_unlock_irq(&mas->lock); } static int spi_geni_transfer_one(struct spi_master *spi, @@ -478,11 +502,11 @@ static irqreturn_t geni_spi_isr(int irq, void *data) struct geni_se *se = &mas->se; u32 m_irq; - if (mas->cur_mcmd == CMD_NONE) + m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); + if (!m_irq) return IRQ_NONE; spin_lock(&mas->lock); - m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); if ((m_irq & M_RX_FIFO_WATERMARK_EN) || (m_irq & M_RX_FIFO_LAST_EN)) geni_spi_handle_rx(mas); @@ -522,8 +546,23 @@ static irqreturn_t geni_spi_isr(int irq, void *data) complete(&mas->xfer_done); } + /* + * It's safe or a good idea to Ack all of our our interrupts at the + * end of the function. Specifically: + * - M_CMD_DONE_EN / M_RX_FIFO_LAST_EN: Edge triggered interrupts and + * clearing Acks. Clearing at the end relies on nobody else having + * started a new transfer yet or else we could be clearing _their_ + * done bit, but everyone grabs the spinlock before starting a new + * transfer. + * - M_RX_FIFO_WATERMARK_EN / M_TX_FIFO_WATERMARK_EN: These appear + * to be "latched level" interrupts so it's important to clear them + * _after_ you've handled the condition and always safe to do so + * since they'll re-assert if they're still happening. + */ writel(m_irq, se->base + SE_GENI_M_IRQ_CLEAR); + spin_unlock(&mas->lock); + return IRQ_HANDLED; } From e191a082d764e80a36c198da61fbf2851ebf425a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 18 Jun 2020 08:06:24 -0700 Subject: [PATCH 2/4] spi: spi-geni-qcom: Check for error IRQs >From reading the #defines it seems like we should shout if we ever see one of these error bits. Let's do so. This doesn't do anything functional except print a yell in the log if the error bits are seen. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200618080459.v4.3.Id8bebdbdb4d2ed9468634343a7e6207d6cffff8a@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 7d022ccb1b6c..11f36d237c77 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -506,6 +506,11 @@ static irqreturn_t geni_spi_isr(int irq, void *data) if (!m_irq) return IRQ_NONE; + if (m_irq & (M_CMD_OVERRUN_EN | M_ILLEGAL_CMD_EN | M_CMD_FAILURE_EN | + M_RX_FIFO_RD_ERR_EN | M_RX_FIFO_WR_ERR_EN | + M_TX_FIFO_RD_ERR_EN | M_TX_FIFO_WR_ERR_EN)) + dev_warn(mas->dev, "Unexpected IRQ err status %#010x\n", m_irq); + spin_lock(&mas->lock); if ((m_irq & M_RX_FIFO_WATERMARK_EN) || (m_irq & M_RX_FIFO_LAST_EN)) From 902481a78ee4173926dc59f060526dee21aeb7a8 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 18 Jun 2020 08:06:25 -0700 Subject: [PATCH 3/4] spi: spi-geni-qcom: Actually use our FIFO The geni hardware has a FIFO that can hold up to 64 bytes (it has 16 entries that can hold 4 bytes each), at least on the two SoCs I tested (sdm845 and sc7180). We configured our RX Watermark to 0, which basically meant we got an interrupt as soon as the first 4 bytes showed up in the FIFO. Tracing the IRQ handler showed that we often only read 4 or 8 bytes per IRQ handler. I tried setting the RX Watermark to "fifo size - 2" but that just got me a bunch of overrun errors reported. Setting it to "fifo size - 3" seemed to work great, though. This made me worried that we'd start getting overruns if we had long interrupt latency, but that doesn't appear to be the case and delays inserted in the IRQ handler while using "fifo size - 3" didn't cause any errors. Presumably there is some interaction with the poorly-documented RFR (ready for receive) level means that "fifo size - 3" is the max. We are the SPI master, so it makes sense that there would be no problems with overruns, the master should just stop clocking. Despite "fifo size - 3" working, I chose "fifo size / 2" (8 entries = 32 bytes) which gives us a little extra time to get to the interrupt handler and should reduce dead time on the SPI wires. With this setting, I often saw the IRQ handler handle 40 bytes but sometimes up to 56 if we had bad interrupt latency. Testing by running "flashrom -p ec -r" on a Chromebook saw interrupts from the SPI driver cut roughly in half. Time was roughly the same. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200618080459.v4.4.I988281f7c6ee0ed00325559bfce7539f403da69e@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 11f36d237c77..5b8ca8b93b06 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -285,7 +285,7 @@ static int spi_geni_init(struct spi_geni_master *mas) * Hardware programming guide suggests to configure * RX FIFO RFR level to fifo_depth-2. */ - geni_se_init(se, 0x0, mas->tx_fifo_depth - 2); + geni_se_init(se, mas->tx_fifo_depth / 2, mas->tx_fifo_depth - 2); /* Transmit an entire FIFO worth of data per IRQ */ mas->tx_wm = 1; ver = geni_se_get_qup_hw_version(se); From 7ba9bdcb91f694b0eaf486a825afd9c2d99532b7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 18 Jun 2020 08:06:26 -0700 Subject: [PATCH 4/4] spi: spi-geni-qcom: Don't keep a local state variable The variable "cur_mcmd" kept track of our current state (idle, xfer, cs, cancel). We don't really need it, so get rid of it. Instead: * Use separate condition variables for "chip select done", "cancel done", and "abort done". This is important so that if a "done" comes through (perhaps some previous interrupt finally came through) it can't confuse the cancel/abort function. * Use the "done" interrupt only for when a chip select or transfer is done and we can tell the difference by looking at whether "cur_xfer" is NULL. This is mostly a no-op change. However, it is possible it could fix an issue where a super delayed interrupt for a cancel command could have confused our waiting for an abort command. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200618080459.v4.5.Ib1e6855405fc9c99916ab7c7dee84d73a8bf3d68@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 55 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 5b8ca8b93b06..0c534d151370 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -63,13 +63,6 @@ #define TIMESTAMP_AFTER BIT(3) #define POST_CMD_DELAY BIT(4) -enum spi_m_cmd_opcode { - CMD_NONE, - CMD_XFER, - CMD_CS, - CMD_CANCEL, -}; - struct spi_geni_master { struct geni_se se; struct device *dev; @@ -81,10 +74,11 @@ struct spi_geni_master { unsigned int tx_rem_bytes; unsigned int rx_rem_bytes; const struct spi_transfer *cur_xfer; - struct completion xfer_done; + struct completion cs_done; + struct completion cancel_done; + struct completion abort_done; unsigned int oversampling; spinlock_t lock; - enum spi_m_cmd_opcode cur_mcmd; int irq; }; @@ -126,20 +120,23 @@ static void handle_fifo_timeout(struct spi_master *spi, struct geni_se *se = &mas->se; spin_lock_irq(&mas->lock); - reinit_completion(&mas->xfer_done); - mas->cur_mcmd = CMD_CANCEL; - geni_se_cancel_m_cmd(se); + reinit_completion(&mas->cancel_done); writel(0, se->base + SE_GENI_TX_WATERMARK_REG); + mas->cur_xfer = NULL; + mas->tx_rem_bytes = mas->rx_rem_bytes = 0; + geni_se_cancel_m_cmd(se); spin_unlock_irq(&mas->lock); - time_left = wait_for_completion_timeout(&mas->xfer_done, HZ); + + time_left = wait_for_completion_timeout(&mas->cancel_done, HZ); if (time_left) return; spin_lock_irq(&mas->lock); - reinit_completion(&mas->xfer_done); + reinit_completion(&mas->abort_done); geni_se_abort_m_cmd(se); spin_unlock_irq(&mas->lock); - time_left = wait_for_completion_timeout(&mas->xfer_done, HZ); + + time_left = wait_for_completion_timeout(&mas->abort_done, HZ); if (!time_left) dev_err(mas->dev, "Failed to cancel/abort m_cmd\n"); } @@ -156,15 +153,14 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) set_flag = !set_flag; spin_lock_irq(&mas->lock); - reinit_completion(&mas->xfer_done); - mas->cur_mcmd = CMD_CS; + reinit_completion(&mas->cs_done); if (set_flag) geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0); else geni_se_setup_m_cmd(se, SPI_CS_DEASSERT, 0); spin_unlock_irq(&mas->lock); - time_left = wait_for_completion_timeout(&mas->xfer_done, HZ); + time_left = wait_for_completion_timeout(&mas->cs_done, HZ); if (!time_left) handle_fifo_timeout(spi, NULL); @@ -383,7 +379,6 @@ static void setup_fifo_xfer(struct spi_transfer *xfer, mas->rx_rem_bytes = xfer->len; } writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG); - mas->cur_mcmd = CMD_XFER; /* * Lock around right before we start the transfer since our @@ -520,11 +515,13 @@ static irqreturn_t geni_spi_isr(int irq, void *data) geni_spi_handle_tx(mas); if (m_irq & M_CMD_DONE_EN) { - if (mas->cur_mcmd == CMD_XFER) + if (mas->cur_xfer) { spi_finalize_current_transfer(spi); - else if (mas->cur_mcmd == CMD_CS) - complete(&mas->xfer_done); - mas->cur_mcmd = CMD_NONE; + mas->cur_xfer = NULL; + } else { + complete(&mas->cs_done); + } + /* * If this happens, then a CMD_DONE came before all the Tx * buffer bytes were sent out. This is unusual, log this @@ -546,10 +543,10 @@ static irqreturn_t geni_spi_isr(int irq, void *data) mas->rx_rem_bytes, mas->cur_bits_per_word); } - if ((m_irq & M_CMD_CANCEL_EN) || (m_irq & M_CMD_ABORT_EN)) { - mas->cur_mcmd = CMD_NONE; - complete(&mas->xfer_done); - } + if (m_irq & M_CMD_CANCEL_EN) + complete(&mas->cancel_done); + if (m_irq & M_CMD_ABORT_EN) + complete(&mas->abort_done); /* * It's safe or a good idea to Ack all of our our interrupts at the @@ -617,7 +614,9 @@ static int spi_geni_probe(struct platform_device *pdev) spi->handle_err = handle_fifo_timeout; spi->set_cs = spi_geni_set_cs; - init_completion(&mas->xfer_done); + init_completion(&mas->cs_done); + init_completion(&mas->cancel_done); + init_completion(&mas->abort_done); spin_lock_init(&mas->lock); pm_runtime_enable(dev);