82ba4faca1
Since commit:52aec3308d
("x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR") the TLB remote shootdown is done through call function vector. That commit didn't take care of irq_tlb_count, which a later commit:fd0f586972
("x86: Distinguish TLB shootdown interrupts from other functions call interrupts") ... tried to fix. The fix assumes every increase of irq_tlb_count has a corresponding increase of irq_call_count. So the irq_call_count is always bigger than irq_tlb_count and we could substract irq_tlb_count from irq_call_count. Unfortunately this is not true for the smp_call_function_single() case. The IPI is only sent if the target CPU's call_single_queue is empty when adding a csd into it in generic_exec_single. That means if two threads are both adding flush tlb csds to the same CPU's call_single_queue, only one IPI is sent. In other words, the irq_call_count is incremented by 1 but irq_tlb_count is incremented by 2. Over time, irq_tlb_count will be bigger than irq_call_count and the substract will produce a very large irq_call_count value due to overflow. Considering that: 1) it's not worth to send more IPIs for the sake of accurate counting of irq_call_count in generic_exec_single(); 2) it's not easy to tell if the call function interrupt is for TLB shootdown in __smp_call_function_single_interrupt(). Not to exclude TLB shootdown from call function count seems to be the simplest fix and this patch just does that. This bug was found by LKP's cyclic performance regression tracking recently with the vm-scalability test suite. I have bisected to commit:3dec0ba0be
("mm/rmap: share the i_mmap_rwsem") This commit didn't do anything wrong but revealed the irq_call_count problem. IIUC, the commit makes rwc->remap_one in rmap_walk_file concurrent with multiple threads. When remap_one is try_to_unmap_one(), then multiple threads could queue flush TLB to the same CPU but only one IPI will be sent. Since the commit was added in Linux v3.19, the counting problem only shows up from v3.19 onwards. Signed-off-by: Aaron Lu <aaron.lu@intel.com> Cc: Alex Shi <alex.shi@linaro.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com> Link: http://lkml.kernel.org/r/20160811074430.GA18163@aaronlu.sh.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
64 lines
1.7 KiB
C
64 lines
1.7 KiB
C
#ifndef _ASM_X86_HARDIRQ_H
|
|
#define _ASM_X86_HARDIRQ_H
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/irq.h>
|
|
|
|
typedef struct {
|
|
unsigned int __softirq_pending;
|
|
unsigned int __nmi_count; /* arch dependent */
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
unsigned int apic_timer_irqs; /* arch dependent */
|
|
unsigned int irq_spurious_count;
|
|
unsigned int icr_read_retry_count;
|
|
#endif
|
|
#ifdef CONFIG_HAVE_KVM
|
|
unsigned int kvm_posted_intr_ipis;
|
|
unsigned int kvm_posted_intr_wakeup_ipis;
|
|
#endif
|
|
unsigned int x86_platform_ipis; /* arch dependent */
|
|
unsigned int apic_perf_irqs;
|
|
unsigned int apic_irq_work_irqs;
|
|
#ifdef CONFIG_SMP
|
|
unsigned int irq_resched_count;
|
|
unsigned int irq_call_count;
|
|
unsigned int irq_tlb_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_THERMAL_VECTOR
|
|
unsigned int irq_thermal_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE_THRESHOLD
|
|
unsigned int irq_threshold_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE_AMD
|
|
unsigned int irq_deferred_error_count;
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
|
|
unsigned int irq_hv_callback_count;
|
|
#endif
|
|
} ____cacheline_aligned irq_cpustat_t;
|
|
|
|
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
|
|
|
|
#define __ARCH_IRQ_STAT
|
|
|
|
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
|
|
|
|
#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
|
|
|
|
#define __ARCH_SET_SOFTIRQ_PENDING
|
|
|
|
#define set_softirq_pending(x) \
|
|
this_cpu_write(irq_stat.__softirq_pending, (x))
|
|
#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
|
|
|
|
extern void ack_bad_irq(unsigned int irq);
|
|
|
|
extern u64 arch_irq_stat_cpu(unsigned int cpu);
|
|
#define arch_irq_stat_cpu arch_irq_stat_cpu
|
|
|
|
extern u64 arch_irq_stat(void);
|
|
#define arch_irq_stat arch_irq_stat
|
|
|
|
#endif /* _ASM_X86_HARDIRQ_H */
|