70e4a36973
Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar <mingo@elte.hu>
186 lines
5.1 KiB
C
186 lines
5.1 KiB
C
#ifndef _ASM_X86_IRQ_VECTORS_H
|
|
#define _ASM_X86_IRQ_VECTORS_H
|
|
|
|
#include <linux/threads.h>
|
|
/*
|
|
* Linux IRQ vector layout.
|
|
*
|
|
* There are 256 IDT entries (per CPU - each entry is 8 bytes) which can
|
|
* be defined by Linux. They are used as a jump table by the CPU when a
|
|
* given vector is triggered - by a CPU-external, CPU-internal or
|
|
* software-triggered event.
|
|
*
|
|
* Linux sets the kernel code address each entry jumps to early during
|
|
* bootup, and never changes them. This is the general layout of the
|
|
* IDT entries:
|
|
*
|
|
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
|
* Vectors 32 ... 127 : device interrupts
|
|
* Vector 128 : legacy int80 syscall interface
|
|
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
|
|
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
|
*
|
|
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
|
|
*
|
|
* This file enumerates the exact layout of them:
|
|
*/
|
|
|
|
#define NMI_VECTOR 0x02
|
|
#define MCE_VECTOR 0x12
|
|
|
|
/*
|
|
* IDT vectors usable for external interrupt sources start at 0x20.
|
|
* (0x80 is the syscall vector, 0x30-0x3f are for ISA)
|
|
*/
|
|
#define FIRST_EXTERNAL_VECTOR 0x20
|
|
/*
|
|
* We start allocating at 0x21 to spread out vectors evenly between
|
|
* priority levels. (0x80 is the syscall vector)
|
|
*/
|
|
#define VECTOR_OFFSET_START 1
|
|
|
|
/*
|
|
* Reserve the lowest usable vector (and hence lowest priority) 0x20 for
|
|
* triggering cleanup after irq migration. 0x21-0x2f will still be used
|
|
* for device interrupts.
|
|
*/
|
|
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
|
|
|
|
#define IA32_SYSCALL_VECTOR 0x80
|
|
#ifdef CONFIG_X86_32
|
|
# define SYSCALL_VECTOR 0x80
|
|
#endif
|
|
|
|
/*
|
|
* Vectors 0x30-0x3f are used for ISA interrupts.
|
|
* round up to the next 16-vector boundary
|
|
*/
|
|
#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
|
|
|
|
#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
|
|
#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
|
|
#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
|
|
#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
|
|
#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
|
|
#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
|
|
#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
|
|
#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
|
|
#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
|
|
#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
|
|
#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
|
|
#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
|
|
#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
|
|
#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
|
|
#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
|
|
|
|
/*
|
|
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
|
|
*
|
|
* some of the following vectors are 'rare', they are merged
|
|
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
|
|
* TLB, reschedule and local APIC vectors are performance-critical.
|
|
*/
|
|
|
|
#define SPURIOUS_APIC_VECTOR 0xff
|
|
/*
|
|
* Sanity check
|
|
*/
|
|
#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
|
|
# error SPURIOUS_APIC_VECTOR definition error
|
|
#endif
|
|
|
|
#define ERROR_APIC_VECTOR 0xfe
|
|
#define RESCHEDULE_VECTOR 0xfd
|
|
#define CALL_FUNCTION_VECTOR 0xfc
|
|
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
|
|
#define THERMAL_APIC_VECTOR 0xfa
|
|
#define THRESHOLD_APIC_VECTOR 0xf9
|
|
#define REBOOT_VECTOR 0xf8
|
|
|
|
/*
|
|
* Generic system vector for platform specific use
|
|
*/
|
|
#define X86_PLATFORM_IPI_VECTOR 0xf7
|
|
|
|
/*
|
|
* IRQ work vector:
|
|
*/
|
|
#define IRQ_WORK_VECTOR 0xf6
|
|
|
|
#define UV_BAU_MESSAGE 0xf5
|
|
|
|
/*
|
|
* Self IPI vector for machine checks
|
|
*/
|
|
#define MCE_SELF_VECTOR 0xf4
|
|
|
|
/* Xen vector callback to receive events in a HVM domain */
|
|
#define XEN_HVM_EVTCHN_CALLBACK 0xf3
|
|
|
|
/*
|
|
* Local APIC timer IRQ vector is on a different priority level,
|
|
* to work around the 'lost local interrupt if more than 2 IRQ
|
|
* sources per level' errata.
|
|
*/
|
|
#define LOCAL_TIMER_VECTOR 0xef
|
|
|
|
/* up to 32 vectors used for spreading out TLB flushes: */
|
|
#if NR_CPUS <= 32
|
|
# define NUM_INVALIDATE_TLB_VECTORS NR_CPUS
|
|
#else
|
|
# define NUM_INVALIDATE_TLB_VECTORS 32
|
|
#endif
|
|
|
|
#define INVALIDATE_TLB_VECTOR_END 0xee
|
|
#define INVALIDATE_TLB_VECTOR_START \
|
|
(INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1)
|
|
|
|
#define NR_VECTORS 256
|
|
|
|
#define FPU_IRQ 13
|
|
|
|
#define FIRST_VM86_IRQ 3
|
|
#define LAST_VM86_IRQ 15
|
|
|
|
#ifndef __ASSEMBLY__
|
|
static inline int invalid_vm86_irq(int irq)
|
|
{
|
|
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Size the maximum number of interrupts.
|
|
*
|
|
* If the irq_desc[] array has a sparse layout, we can size things
|
|
* generously - it scales up linearly with the maximum number of CPUs,
|
|
* and the maximum number of IO-APICs, whichever is higher.
|
|
*
|
|
* In other cases we size more conservatively, to not create too large
|
|
* static arrays.
|
|
*/
|
|
|
|
#define NR_IRQS_LEGACY 16
|
|
|
|
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
# ifdef CONFIG_SPARSE_IRQ
|
|
# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
|
|
# define NR_IRQS \
|
|
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
|
|
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
|
|
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
|
|
# else
|
|
# define CPU_VECTOR_LIMIT (32 * NR_CPUS)
|
|
# define NR_IRQS \
|
|
(CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
|
|
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
|
|
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
|
|
# endif
|
|
#else /* !CONFIG_X86_IO_APIC: */
|
|
# define NR_IRQS NR_IRQS_LEGACY
|
|
#endif
|
|
|
|
#endif /* _ASM_X86_IRQ_VECTORS_H */
|