a33fda35e3
This patch introduces a new generic queued spinlock implementation that can serve as an alternative to the default ticket spinlock. Compared with the ticket spinlock, this queued spinlock should be almost as fair as the ticket spinlock. It has about the same speed in single-thread and it can be much faster in high contention situations especially when the spinlock is embedded within the data structure to be protected. Only in light to moderate contention where the average queue depth is around 1-3 will this queued spinlock be potentially a bit slower due to the higher slowpath overhead. This queued spinlock is especially suit to NUMA machines with a large number of cores as the chance of spinlock contention is much higher in those machines. The cost of contention is also higher because of slower inter-node memory traffic. Due to the fact that spinlocks are acquired with preemption disabled, the process will not be migrated to another CPU while it is trying to get a spinlock. Ignoring interrupt handling, a CPU can only be contending in one spinlock at any one time. Counting soft IRQ, hard IRQ and NMI, a CPU can only have a maximum of 4 concurrent lock waiting activities. By allocating a set of per-cpu queue nodes and used them to form a waiting queue, we can encode the queue node address into a much smaller 24-bit size (including CPU number and queue node index) leaving one byte for the lock. Please note that the queue node is only needed when waiting for the lock. Once the lock is acquired, the queue node can be released to be used later. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Daniel J Blueman <daniel@numascale.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Douglas Hatch <doug.hatch@hp.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paolo Bonzini <paolo.bonzini@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Cc: Rik van Riel <riel@redhat.com> Cc: Scott J Norton <scott.norton@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org> |
||
---|---|---|
.. | ||
bitops | ||
4level-fixup.h | ||
atomic64.h | ||
atomic-long.h | ||
atomic.h | ||
audit_change_attr.h | ||
audit_dir_write.h | ||
audit_read.h | ||
audit_signal.h | ||
audit_write.h | ||
barrier.h | ||
bitops.h | ||
bitsperlong.h | ||
bug.h | ||
bugs.h | ||
cache.h | ||
cacheflush.h | ||
checksum.h | ||
clkdev.h | ||
cmpxchg-local.h | ||
cmpxchg.h | ||
cputime_jiffies.h | ||
cputime_nsecs.h | ||
cputime.h | ||
current.h | ||
delay.h | ||
device.h | ||
div64.h | ||
dma-coherent.h | ||
dma-contiguous.h | ||
dma-mapping-broken.h | ||
dma-mapping-common.h | ||
dma.h | ||
early_ioremap.h | ||
emergency-restart.h | ||
exec.h | ||
fb.h | ||
fixmap.h | ||
ftrace.h | ||
futex.h | ||
getorder.h | ||
gpio.h | ||
hardirq.h | ||
hugetlb.h | ||
hw_irq.h | ||
ide_iops.h | ||
int-ll64.h | ||
io-64-nonatomic-hi-lo.h | ||
io-64-nonatomic-lo-hi.h | ||
io.h | ||
ioctl.h | ||
iomap.h | ||
irq_regs.h | ||
irq_work.h | ||
irq.h | ||
irqflags.h | ||
Kbuild.asm | ||
kdebug.h | ||
kmap_types.h | ||
kvm_para.h | ||
libata-portmap.h | ||
linkage.h | ||
local64.h | ||
local.h | ||
mcs_spinlock.h | ||
memory_model.h | ||
mm_hooks.h | ||
mmu_context.h | ||
mmu.h | ||
module.h | ||
msi.h | ||
mutex-dec.h | ||
mutex-null.h | ||
mutex-xchg.h | ||
mutex.h | ||
page.h | ||
param.h | ||
parport.h | ||
pci_iomap.h | ||
pci-bridge.h | ||
pci-dma-compat.h | ||
pci.h | ||
percpu.h | ||
pgalloc.h | ||
pgtable-nopmd.h | ||
pgtable-nopud.h | ||
pgtable.h | ||
preempt.h | ||
ptrace.h | ||
qrwlock_types.h | ||
qrwlock.h | ||
qspinlock_types.h | ||
qspinlock.h | ||
resource.h | ||
rtc.h | ||
rwsem.h | ||
scatterlist.h | ||
seccomp.h | ||
sections.h | ||
segment.h | ||
serial.h | ||
siginfo.h | ||
signal.h | ||
simd.h | ||
sizes.h | ||
spinlock.h | ||
statfs.h | ||
string.h | ||
switch_to.h | ||
syscall.h | ||
syscalls.h | ||
termios-base.h | ||
termios.h | ||
timex.h | ||
tlb.h | ||
tlbflush.h | ||
topology.h | ||
trace_clock.h | ||
uaccess-unaligned.h | ||
uaccess.h | ||
unaligned.h | ||
unistd.h | ||
user.h | ||
vga.h | ||
vmlinux.lds.h | ||
vtime.h | ||
word-at-a-time.h | ||
xor.h |