Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller: - Fix DMA regression in 4.13 merge window, only certain chips can do 64-bit DMA. From Dave Dushar. - Correct cpu cross-call algorithm to correctly detect stalled or stuck remote cpus, from Jane Chu. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: sparc64: Measure receiver forward progress to avoid send mondo timeout SPARC64: Fix sun4v DMA panic
2017-07-17 15:08:29 -07:00 · 2017-07-17 15:08:29 -07:00 · cb8c65ccff
commit cb8c65ccff
parent 935acd3f5e 9d53caec84
5 changed files with 142 additions and 78 deletions
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
 void init_cur_cpu_trap(struct thread_info *);
 void setup_tba(void);
 extern int ncpus_probed;
 extern u64 cpu_mondo_counter[NR_CPUS];
 unsigned long real_hard_smp_processor_id(void);
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 static int dma_4v_supported(struct device *dev, u64 device_mask)
 {
 	struct iommu *iommu = dev->archdata.iommu;
-	u64 dma_addr_mask;
+	u64 dma_addr_mask = iommu->dma_addr_mask;
-	if (device_mask > DMA_BIT_MASK(32) && iommu->atu)
+	if (device_mask > DMA_BIT_MASK(32)) {
-		dma_addr_mask = iommu->atu->dma_addr_mask;
+		if (iommu->atu)
-	else
+			dma_addr_mask = iommu->atu->dma_addr_mask;
-		dma_addr_mask = iommu->dma_addr_mask;
+		else
 			return 0;
 	}
 	if ((device_mask & dma_addr_mask) == dma_addr_mask)
 		return 1;
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@ -622,22 +622,48 @@ retry:
 	}
 }
-/* Multi-cpu list version.  */
+#define	CPU_MONDO_COUNTER(cpuid)	(cpu_mondo_counter[cpuid])
 #define	MONDO_USEC_WAIT_MIN		2
 #define	MONDO_USEC_WAIT_MAX		100
 #define	MONDO_RETRY_LIMIT		500000
 /* Multi-cpu list version.
 *
 * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
 * Sometimes not all cpus receive the mondo, requiring us to re-send
 * the mondo until all cpus have received, or cpus are truly stuck
 * unable to receive mondo, and we timeout.
 * Occasionally a target cpu strand is borrowed briefly by hypervisor to
 * perform guest service, such as PCIe error handling. Consider the
 * service time, 1 second overall wait is reasonable for 1 cpu.
 * Here two in-between mondo check wait time are defined: 2 usec for
 * single cpu quick turn around and up to 100usec for large cpu count.
 * Deliver mondo to large number of cpus could take longer, we adjusts
 * the retry count as long as target cpus are making forward progress.
 */
 static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
-	int retries, this_cpu, prev_sent, i, saw_cpu_error;
+	int this_cpu, tot_cpus, prev_sent, i, rem;
 	int usec_wait, retries, tot_retries;
 	u16 first_cpu = 0xffff;
 	unsigned long xc_rcvd = 0;
 	unsigned long status;
 	int ecpuerror_id = 0;
 	int enocpu_id = 0;
 	u16 *cpu_list;
 	u16 cpu;
 	this_cpu = smp_processor_id();
 	cpu_list = __va(tb->cpu_list_pa);
-
+	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
-	saw_cpu_error = 0;
+	if (usec_wait > MONDO_USEC_WAIT_MAX)
-	retries = 0;
+		usec_wait = MONDO_USEC_WAIT_MAX;
 	retries = tot_retries = 0;
 	tot_cpus = cnt;
 	prev_sent = 0;
 	do {
-		int forward_progress, n_sent;
+		int n_sent, mondo_delivered, target_cpu_busy;
 		status = sun4v_cpu_mondo_send(cnt,
 					      tb->cpu_list_pa,
@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 		/* HV_EOK means all cpus received the xcall, we're done.  */
 		if (likely(status == HV_EOK))
-			break;
+			goto xcall_done;
 		/* If not these non-fatal errors, panic */
 		if (unlikely((status != HV_EWOULDBLOCK) &&
 			(status != HV_ECPUERROR) &&
 			(status != HV_ENOCPU)))
 			goto fatal_errors;
 		/* First, see if we made any forward progress.
 		 *
 		 * Go through the cpu_list, count the target cpus that have
 		 * received our mondo (n_sent), and those that did not (rem).
 		 * Re-pack cpu_list with the cpus remain to be retried in the
 		 * front - this simplifies tracking the truly stalled cpus.
 		 *
 		 * The hypervisor indicates successful sends by setting
 		 * cpu list entries to the value 0xffff.
 		 *
 		 * EWOULDBLOCK means some target cpus did not receive the
 		 * mondo and retry usually helps.
 		 *
 		 * ECPUERROR means at least one target cpu is in error state,
 		 * it's usually safe to skip the faulty cpu and retry.
 		 *
 		 * ENOCPU means one of the target cpu doesn't belong to the
 		 * domain, perhaps offlined which is unexpected, but not
 		 * fatal and it's okay to skip the offlined cpu.
 		 */
 		rem = 0;
 		n_sent = 0;
 		for (i = 0; i < cnt; i++) {
-			if (likely(cpu_list[i] == 0xffff))
+			cpu = cpu_list[i];
 			if (likely(cpu == 0xffff)) {
 				n_sent++;
 			} else if ((status == HV_ECPUERROR) &&
 				(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
 				ecpuerror_id = cpu + 1;
 			} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
 				enocpu_id = cpu + 1;
 			} else {
 				cpu_list[rem++] = cpu;
 			}
 		}
-		forward_progress = 0;
+		/* No cpu remained, we're done. */
-		if (n_sent > prev_sent)
+		if (rem == 0)
-			forward_progress = 1;
+			break;
 		/* Otherwise, update the cpu count for retry. */
 		cnt = rem;
 		/* Record the overall number of mondos received by the
 		 * first of the remaining cpus.
 		 */
 		if (first_cpu != cpu_list[0]) {
 			first_cpu = cpu_list[0];
 			xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
 		}
 		/* Was any mondo delivered successfully? */
 		mondo_delivered = (n_sent > prev_sent);
 		prev_sent = n_sent;
-		/* If we get a HV_ECPUERROR, then one or more of the cpus
+		/* or, was any target cpu busy processing other mondos? */
-		 * in the list are in error state.  Use the cpu_state()
+		target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
-		 * hypervisor call to find out which cpus are in error state.
+		xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
 		/* Retry count is for no progress. If we're making progress,
 		 * reset the retry count.
 		 */
-		if (unlikely(status == HV_ECPUERROR)) {
+		if (likely(mondo_delivered || target_cpu_busy)) {
-			for (i = 0; i < cnt; i++) {
+			tot_retries += retries;
-				long err;
+			retries = 0;
-				u16 cpu;
+		} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
-
+			goto fatal_mondo_timeout;
 				cpu = cpu_list[i];
 				if (cpu == 0xffff)
 					continue;
 				err = sun4v_cpu_state(cpu);
 				if (err == HV_CPU_STATE_ERROR) {
 					saw_cpu_error = (cpu + 1);
 					cpu_list[i] = 0xffff;
 				}
 			}
 		} else if (unlikely(status != HV_EWOULDBLOCK))
 			goto fatal_mondo_error;
 		/* Don't bother rewriting the CPU list, just leave the
 		 * 0xffff and non-0xffff entries in there and the
 		 * hypervisor will do the right thing.
 		 *
 		 * Only advance timeout state if we didn't make any
 		 * forward progress.
 		 */
 		if (unlikely(!forward_progress)) {
 			if (unlikely(++retries > 10000))
 				goto fatal_mondo_timeout;
 			/* Delay a little bit to let other cpus catch up
 			 * on their cpu mondo queue work.
 			 */
 			udelay(2 * cnt);
 		}
 		/* Delay a little bit to let other cpus catch up on
 		 * their cpu mondo queue work.
 		 */
 		if (!mondo_delivered)
 			udelay(usec_wait);
 		retries++;
 	} while (1);
-	if (unlikely(saw_cpu_error))
+xcall_done:
-		goto fatal_mondo_cpu_error;
+	if (unlikely(ecpuerror_id > 0)) {
-
+		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
 		       this_cpu, ecpuerror_id - 1);
 	} else if (unlikely(enocpu_id > 0)) {
 		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
 		       this_cpu, enocpu_id - 1);
 	}
 	return;
-fatal_mondo_cpu_error:
+fatal_errors:
-	printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+	/* fatal errors include bad alignment, etc */
-	       "(including %d) were in error state\n",
+	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
-	       this_cpu, saw_cpu_error - 1);
+	       this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
-	return;
+	panic("Unexpected SUN4V mondo error %lu\n", status);
 fatal_mondo_timeout:
-	printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+	/* some cpus being non-responsive to the cpu mondo */
-	       " progress after %d retries.\n",
+	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
-	       this_cpu, retries);
+	       this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
-	goto dump_cpu_list_and_out;
+	panic("SUN4V mondo timeout panic\n");
 fatal_mondo_error:
 	printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
 	       this_cpu, status);
 	printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
 	       "mondo_block_pa(%lx)\n",
 	       this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
 dump_cpu_list_and_out:
 	printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
 	for (i = 0; i < cnt; i++)
 		printk("%u ", cpu_list[i]);
 	printk("]\n");
 }
 static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
--- a/arch/sparc/kernel/sun4v_ivec.S
+++ b/arch/sparc/kernel/sun4v_ivec.S
@ -26,6 +26,21 @@ sun4v_cpu_mondo:
 	ldxa	[%g0] ASI_SCRATCHPAD, %g4
 	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
 	/* Get smp_processor_id() into %g3 */
 	sethi	%hi(trap_block), %g5
 	or	%g5, %lo(trap_block), %g5
 	sub	%g4, %g5, %g3
 	srlx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
 	/* Increment cpu_mondo_counter[smp_processor_id()] */
 	sethi	%hi(cpu_mondo_counter), %g5
 	or	%g5, %lo(cpu_mondo_counter), %g5
 	sllx	%g3, 3, %g3
 	add	%g5, %g3, %g5
 	ldx	[%g5], %g3
 	add	%g3, 1, %g3
 	stx	%g3, [%g5]
 	/* Get CPU mondo queue base phys address into %g7.  */
 	ldx	[%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
 	}
 }
 u64 cpu_mondo_counter[NR_CPUS] = {0};
 struct trap_per_cpu trap_block[NR_CPUS];
 EXPORT_SYMBOL(trap_block);