powerpc/64: Don't recurse irq replay

Interrupt handlers called by soft-pending irq replay code can run
softirqs, softirq replay enables and disables local irqs, which allows
interrupts to come in including soft-masked interrupts, and it can
cause pending irqs to be replayed again. That makes the soft irq replay
state machine and possible races more complicated and fragile than it
needs to be.

Use irq_enter/irq_exit around irq replay to prevent softirqs running
while interrupts are being replayed. Softirqs will now be run at the
irq_exit() call after all the irq replaying is done. This prevents irqs
being replayed while irqs are being replayed, and should hopefully make
things simpler and easier to think about and debug.

A new PACA_IRQ_REPLAYING is added to prevent asynchronous interrupt
handlers hard-enabling EE while pending irqs are being replayed, because
that causes new pending irqs to arrive which is also a complexity. This
means pending irqs won't be profiled quite so well because perf irqs
can't be taken.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20230121102618.2824429-1-npiggin@gmail.com
This commit is contained in:
Nicholas Piggin 2023-01-21 20:26:18 +10:00 committed by Michael Ellerman
parent bab537805a
commit 5746ca131e
2 changed files with 70 additions and 37 deletions

View File

@ -36,15 +36,17 @@
#define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_DEC 0x08 /* Or FIT */
#define PACA_IRQ_HMI 0x10 #define PACA_IRQ_HMI 0x10
#define PACA_IRQ_PMI 0x20 #define PACA_IRQ_PMI 0x20
#define PACA_IRQ_REPLAYING 0x40
/* /*
* Some soft-masked interrupts must be hard masked until they are replayed * Some soft-masked interrupts must be hard masked until they are replayed
* (e.g., because the soft-masked handler does not clear the exception). * (e.g., because the soft-masked handler does not clear the exception).
* Interrupt replay itself must remain hard masked too.
*/ */
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI|PACA_IRQ_REPLAYING)
#else #else
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_REPLAYING)
#endif #endif
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */

View File

@ -70,22 +70,19 @@ int distribute_irqs = 1;
static inline void next_interrupt(struct pt_regs *regs) static inline void next_interrupt(struct pt_regs *regs)
{ {
/* if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
* Softirq processing can enable/disable irqs, which will leave WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
* MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
* this up. }
*/
if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
hard_irq_disable();
else
irq_soft_mask_set(IRQS_ALL_DISABLED);
/* /*
* We are responding to the next interrupt, so interrupt-off * We are responding to the next interrupt, so interrupt-off
* latencies should be reset here. * latencies should be reset here.
*/ */
lockdep_hardirq_exit();
trace_hardirqs_on(); trace_hardirqs_on();
trace_hardirqs_off(); trace_hardirqs_off();
lockdep_hardirq_enter();
} }
static inline bool irq_happened_test_and_clear(u8 irq) static inline bool irq_happened_test_and_clear(u8 irq)
@ -97,22 +94,11 @@ static inline bool irq_happened_test_and_clear(u8 irq)
return false; return false;
} }
void replay_soft_interrupts(void) static void __replay_soft_interrupts(void)
{ {
struct pt_regs regs; struct pt_regs regs;
/* /*
* Be careful here, calling these interrupt handlers can cause
* softirqs to be raised, which they may run when calling irq_exit,
* which will cause local_irq_enable() to be run, which can then
* recurse into this function. Don't keep any state across
* interrupt handler calls which may change underneath us.
*
* Softirqs can not be disabled over replay to stop this recursion
* because interrupts taken in idle code may require RCU softirq
* to run in the irq RCU tracking context. This is a hard problem
* to fix without changes to the softirq or idle layer.
*
* We use local_paca rather than get_paca() to avoid all the * We use local_paca rather than get_paca() to avoid all the
* debug_smp_processor_id() business in this low level function. * debug_smp_processor_id() business in this low level function.
*/ */
@ -120,13 +106,20 @@ void replay_soft_interrupts(void)
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
WARN_ON_ONCE(mfmsr() & MSR_EE); WARN_ON_ONCE(mfmsr() & MSR_EE);
WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING);
} }
/*
* PACA_IRQ_REPLAYING prevents interrupt handlers from enabling
* MSR[EE] to get PMIs, which can result in more IRQs becoming
* pending.
*/
local_paca->irq_happened |= PACA_IRQ_REPLAYING;
ppc_save_regs(&regs); ppc_save_regs(&regs);
regs.softe = IRQS_ENABLED; regs.softe = IRQS_ENABLED;
regs.msr |= MSR_EE; regs.msr |= MSR_EE;
again:
/* /*
* Force the delivery of pending soft-disabled interrupts on PS3. * Force the delivery of pending soft-disabled interrupts on PS3.
* Any HV call will have this side effect. * Any HV call will have this side effect.
@ -175,13 +168,14 @@ again:
next_interrupt(&regs); next_interrupt(&regs);
} }
/* local_paca->irq_happened &= ~PACA_IRQ_REPLAYING;
* Softirq processing can enable and disable interrupts, which can }
* result in new irqs becoming pending. Must keep looping until we
* have cleared out all pending interrupts. void replay_soft_interrupts(void)
*/ {
if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) irq_enter(); /* See comment in arch_local_irq_restore */
goto again; __replay_soft_interrupts();
irq_exit();
} }
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
@ -200,13 +194,13 @@ static inline void replay_soft_interrupts_irqrestore(void)
if (kuap_state != AMR_KUAP_BLOCKED) if (kuap_state != AMR_KUAP_BLOCKED)
set_kuap(AMR_KUAP_BLOCKED); set_kuap(AMR_KUAP_BLOCKED);
replay_soft_interrupts(); __replay_soft_interrupts();
if (kuap_state != AMR_KUAP_BLOCKED) if (kuap_state != AMR_KUAP_BLOCKED)
set_kuap(kuap_state); set_kuap(kuap_state);
} }
#else #else
#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() #define replay_soft_interrupts_irqrestore() __replay_soft_interrupts()
#endif #endif
notrace void arch_local_irq_restore(unsigned long mask) notrace void arch_local_irq_restore(unsigned long mask)
@ -219,9 +213,13 @@ notrace void arch_local_irq_restore(unsigned long mask)
return; return;
} }
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
WARN_ON_ONCE(in_nmi() || in_hardirq()); WARN_ON_ONCE(in_nmi());
WARN_ON_ONCE(in_hardirq());
WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING);
}
again:
/* /*
* After the stb, interrupts are unmasked and there are no interrupts * After the stb, interrupts are unmasked and there are no interrupts
* pending replay. The restart sequence makes this atomic with * pending replay. The restart sequence makes this atomic with
@ -248,6 +246,12 @@ notrace void arch_local_irq_restore(unsigned long mask)
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
WARN_ON_ONCE(!(mfmsr() & MSR_EE)); WARN_ON_ONCE(!(mfmsr() & MSR_EE));
/*
* If we came here from the replay below, we might have a preempt
* pending (due to preempt_enable_no_resched()). Have to check now.
*/
preempt_check_resched();
return; return;
happened: happened:
@ -261,6 +265,7 @@ happened:
irq_soft_mask_set(IRQS_ENABLED); irq_soft_mask_set(IRQS_ENABLED);
local_paca->irq_happened = 0; local_paca->irq_happened = 0;
__hard_irq_enable(); __hard_irq_enable();
preempt_check_resched();
return; return;
} }
@ -296,12 +301,38 @@ happened:
irq_soft_mask_set(IRQS_ALL_DISABLED); irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off(); trace_hardirqs_off();
/*
* Now enter interrupt context. The interrupt handlers themselves
* also call irq_enter/exit (which is okay, they can nest). But call
* it here now to hold off softirqs until the below irq_exit(). If
* we allowed replayed handlers to run softirqs, that enables irqs,
* which must replay interrupts, which recurses in here and makes
* things more complicated. The recursion is limited to 2, and it can
* be made to work, but it's complicated.
*
* local_bh_disable can not be used here because interrupts taken in
* idle are not in the right context (RCU, tick, etc) to run softirqs
* so irq_enter must be called.
*/
irq_enter();
replay_soft_interrupts_irqrestore(); replay_soft_interrupts_irqrestore();
irq_exit();
if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) {
/*
* The softirq processing in irq_exit() may enable interrupts
* temporarily, which can result in MSR[EE] being enabled and
* more irqs becoming pending. Go around again if that happens.
*/
trace_hardirqs_on();
preempt_enable_no_resched();
goto again;
}
trace_hardirqs_on(); trace_hardirqs_on();
irq_soft_mask_set(IRQS_ENABLED); irq_soft_mask_set(IRQS_ENABLED);
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
local_paca->irq_happened = 0; local_paca->irq_happened = 0;
__hard_irq_enable(); __hard_irq_enable();
preempt_enable(); preempt_enable();