From bf9282dc26e7fe2a0736edc568762f0f05d12416 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 12:22:17 +0200 Subject: [PATCH] cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic This allows moving the leave_mm() call into generic code before rcu_idle_enter(). Gets rid of more trace_*_rcuidle() users. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.369441600@infradead.org --- arch/x86/include/asm/mmu.h | 1 + arch/x86/mm/tlb.c | 13 ++----------- drivers/cpuidle/cpuidle.c | 4 ++++ drivers/idle/intel_idle.c | 16 ---------------- include/linux/cpuidle.h | 13 +++++++------ include/linux/mmu_context.h | 5 +++++ 6 files changed, 19 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 0a301ad0b02f..9257667d13c5 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,5 +59,6 @@ typedef struct { } void leave_mm(int cpu); +#define leave_mm leave_mm #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 1a3569b43aa5..0951b47e64c1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, true); - /* - * NB: This gets called via leave_mm() in the idle path - * where RCU functions differently. Tracing normally - * uses RCU, so we need to use the _rcuidle variant. - * - * (There is no good reason for this. The idle code should - * be rearranged to call this before rcu_idle_enter().) - */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ load_new_mm_cr3(next->pgd, new_asid, false); - /* See above wrt _rcuidle. */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } /* Make sure we write CR3 before loaded_mm. */ diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 9bcda4153d3b..04becd70cc41 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "cpuidle.h" @@ -228,6 +229,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, broadcast = false; } + if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED) + leave_mm(dev->cpu); + /* Take note of the planned idle state. */ sched_idle_set_state(target_state); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8e0fb1a5bdbd..9a810e4a7946 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -89,14 +89,6 @@ static unsigned int mwait_substates __initdata; */ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) -/* - * Set this flag for states where the HW flushes the TLB for us - * and so we don't need cross-calls to keep it consistent. - * If this flag is set, SW flushes the TLB, so even if the - * HW doesn't do the flushing, this flag is safe to use. - */ -#define CPUIDLE_FLAG_TLB_FLUSHED BIT(16) - /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -131,14 +123,6 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, unsigned long eax = flg2MWAIT(state->flags); unsigned long ecx = 1; /* break on interrupt flag */ bool tick; - int cpu = smp_processor_id(); - - /* - * leave_mm() to avoid costly and often unnecessary wakeups - * for flushing the user TLB's associated with the active mm. - */ - if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) - leave_mm(cpu); if (!static_cpu_has(X86_FEATURE_ARAT)) { /* diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b65909ae4e20..75895e6363b8 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -75,12 +75,13 @@ struct cpuidle_state { }; /* Idle State Flags */ -#define CPUIDLE_FLAG_NONE (0x00) -#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ -#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ -#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ -#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ -#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ +#define CPUIDLE_FLAG_NONE (0x00) +#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ +#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ +#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ +#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ +#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ +#define CPUIDLE_FLAG_TLB_FLUSHED BIT(5) /* idle-state flushes TLBs */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index c51a84132d7c..03dee12d2b61 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -3,10 +3,15 @@ #define _LINUX_MMU_CONTEXT_H #include +#include /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm #endif +#ifndef leave_mm +static inline void leave_mm(int cpu) { } +#endif + #endif