b96f7d881a
The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
407 lines
9.3 KiB
C
407 lines
9.3 KiB
C
/*
|
|
* Out of line spinlock code.
|
|
*
|
|
* Copyright IBM Corp. 2004, 2006
|
|
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/export.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/init.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/percpu.h>
|
|
#include <asm/io.h>
|
|
|
|
int spin_retry = -1;
|
|
|
|
static int __init spin_retry_init(void)
|
|
{
|
|
if (spin_retry < 0)
|
|
spin_retry = 1000;
|
|
return 0;
|
|
}
|
|
early_initcall(spin_retry_init);
|
|
|
|
/**
|
|
* spin_retry= parameter
|
|
*/
|
|
static int __init spin_retry_setup(char *str)
|
|
{
|
|
spin_retry = simple_strtoul(str, &str, 0);
|
|
return 1;
|
|
}
|
|
__setup("spin_retry=", spin_retry_setup);
|
|
|
|
struct spin_wait {
|
|
struct spin_wait *next, *prev;
|
|
int node_id;
|
|
} __aligned(32);
|
|
|
|
static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
|
|
|
|
#define _Q_LOCK_CPU_OFFSET 0
|
|
#define _Q_LOCK_STEAL_OFFSET 16
|
|
#define _Q_TAIL_IDX_OFFSET 18
|
|
#define _Q_TAIL_CPU_OFFSET 20
|
|
|
|
#define _Q_LOCK_CPU_MASK 0x0000ffff
|
|
#define _Q_LOCK_STEAL_ADD 0x00010000
|
|
#define _Q_LOCK_STEAL_MASK 0x00030000
|
|
#define _Q_TAIL_IDX_MASK 0x000c0000
|
|
#define _Q_TAIL_CPU_MASK 0xfff00000
|
|
|
|
#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
|
|
#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
|
|
|
|
void arch_spin_lock_setup(int cpu)
|
|
{
|
|
struct spin_wait *node;
|
|
int ix;
|
|
|
|
node = per_cpu_ptr(&spin_wait[0], cpu);
|
|
for (ix = 0; ix < 4; ix++, node++) {
|
|
memset(node, 0, sizeof(*node));
|
|
node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
|
|
(ix << _Q_TAIL_IDX_OFFSET);
|
|
}
|
|
}
|
|
|
|
static inline int arch_load_niai4(int *lock)
|
|
{
|
|
int owner;
|
|
|
|
asm volatile(
|
|
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
|
|
" .long 0xb2fa0040\n" /* NIAI 4 */
|
|
#endif
|
|
" l %0,%1\n"
|
|
: "=d" (owner) : "Q" (*lock) : "memory");
|
|
return owner;
|
|
}
|
|
|
|
static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
|
|
{
|
|
int expected = old;
|
|
|
|
asm volatile(
|
|
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
|
|
" .long 0xb2fa0080\n" /* NIAI 8 */
|
|
#endif
|
|
" cs %0,%3,%1\n"
|
|
: "=d" (old), "=Q" (*lock)
|
|
: "0" (old), "d" (new), "Q" (*lock)
|
|
: "cc", "memory");
|
|
return expected == old;
|
|
}
|
|
|
|
static inline struct spin_wait *arch_spin_decode_tail(int lock)
|
|
{
|
|
int ix, cpu;
|
|
|
|
ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
|
|
cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
|
|
return per_cpu_ptr(&spin_wait[ix], cpu - 1);
|
|
}
|
|
|
|
static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
|
|
{
|
|
if (lock & _Q_LOCK_CPU_MASK)
|
|
return lock & _Q_LOCK_CPU_MASK;
|
|
if (node == NULL || node->prev == NULL)
|
|
return 0; /* 0 -> no target cpu */
|
|
while (node->prev)
|
|
node = node->prev;
|
|
return node->node_id >> _Q_TAIL_CPU_OFFSET;
|
|
}
|
|
|
|
static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
|
|
{
|
|
struct spin_wait *node, *next;
|
|
int lockval, ix, node_id, tail_id, old, new, owner, count;
|
|
|
|
ix = S390_lowcore.spinlock_index++;
|
|
barrier();
|
|
lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
|
|
node = this_cpu_ptr(&spin_wait[ix]);
|
|
node->prev = node->next = NULL;
|
|
node_id = node->node_id;
|
|
|
|
/* Enqueue the node for this CPU in the spinlock wait queue */
|
|
while (1) {
|
|
old = READ_ONCE(lp->lock);
|
|
if ((old & _Q_LOCK_CPU_MASK) == 0 &&
|
|
(old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
|
|
/*
|
|
* The lock is free but there may be waiters.
|
|
* With no waiters simply take the lock, if there
|
|
* are waiters try to steal the lock. The lock may
|
|
* be stolen three times before the next queued
|
|
* waiter will get the lock.
|
|
*/
|
|
new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
|
|
if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
/* Got the lock */
|
|
goto out;
|
|
/* lock passing in progress */
|
|
continue;
|
|
}
|
|
/* Make the node of this CPU the new tail. */
|
|
new = node_id | (old & _Q_LOCK_MASK);
|
|
if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
break;
|
|
}
|
|
/* Set the 'next' pointer of the tail node in the queue */
|
|
tail_id = old & _Q_TAIL_MASK;
|
|
if (tail_id != 0) {
|
|
node->prev = arch_spin_decode_tail(tail_id);
|
|
WRITE_ONCE(node->prev->next, node);
|
|
}
|
|
|
|
/* Pass the virtual CPU to the lock holder if it is not running */
|
|
owner = arch_spin_yield_target(old, node);
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
|
|
/* Spin on the CPU local node->prev pointer */
|
|
if (tail_id != 0) {
|
|
count = spin_retry;
|
|
while (READ_ONCE(node->prev) != NULL) {
|
|
if (count-- >= 0)
|
|
continue;
|
|
count = spin_retry;
|
|
/* Query running state of lock holder again. */
|
|
owner = arch_spin_yield_target(old, node);
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
}
|
|
}
|
|
|
|
/* Spin on the lock value in the spinlock_t */
|
|
count = spin_retry;
|
|
while (1) {
|
|
old = READ_ONCE(lp->lock);
|
|
owner = old & _Q_LOCK_CPU_MASK;
|
|
if (!owner) {
|
|
tail_id = old & _Q_TAIL_MASK;
|
|
new = ((tail_id != node_id) ? tail_id : 0) | lockval;
|
|
if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
/* Got the lock */
|
|
break;
|
|
continue;
|
|
}
|
|
if (count-- >= 0)
|
|
continue;
|
|
count = spin_retry;
|
|
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
}
|
|
|
|
/* Pass lock_spin job to next CPU in the queue */
|
|
if (node_id && tail_id != node_id) {
|
|
/* Wait until the next CPU has set up the 'next' pointer */
|
|
while ((next = READ_ONCE(node->next)) == NULL)
|
|
;
|
|
next->prev = NULL;
|
|
}
|
|
|
|
out:
|
|
S390_lowcore.spinlock_index--;
|
|
}
|
|
|
|
static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
|
|
{
|
|
int lockval, old, new, owner, count;
|
|
|
|
lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
|
|
|
|
/* Pass the virtual CPU to the lock holder if it is not running */
|
|
owner = arch_spin_yield_target(ACCESS_ONCE(lp->lock), NULL);
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
|
|
count = spin_retry;
|
|
while (1) {
|
|
old = arch_load_niai4(&lp->lock);
|
|
owner = old & _Q_LOCK_CPU_MASK;
|
|
/* Try to get the lock if it is free. */
|
|
if (!owner) {
|
|
new = (old & _Q_TAIL_MASK) | lockval;
|
|
if (arch_cmpxchg_niai8(&lp->lock, old, new))
|
|
/* Got the lock */
|
|
return;
|
|
continue;
|
|
}
|
|
if (count-- >= 0)
|
|
continue;
|
|
count = spin_retry;
|
|
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
}
|
|
}
|
|
|
|
void arch_spin_lock_wait(arch_spinlock_t *lp)
|
|
{
|
|
/* Use classic spinlocks + niai if the steal time is >= 10% */
|
|
if (test_cpu_flag(CIF_DEDICATED_CPU))
|
|
arch_spin_lock_queued(lp);
|
|
else
|
|
arch_spin_lock_classic(lp);
|
|
}
|
|
EXPORT_SYMBOL(arch_spin_lock_wait);
|
|
|
|
int arch_spin_trylock_retry(arch_spinlock_t *lp)
|
|
{
|
|
int cpu = SPINLOCK_LOCKVAL;
|
|
int owner, count;
|
|
|
|
for (count = spin_retry; count > 0; count--) {
|
|
owner = READ_ONCE(lp->lock);
|
|
/* Try to get the lock if it is free. */
|
|
if (!owner) {
|
|
if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(arch_spin_trylock_retry);
|
|
|
|
void _raw_read_lock_wait(arch_rwlock_t *rw)
|
|
{
|
|
int count = spin_retry;
|
|
int owner, old;
|
|
|
|
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
|
|
__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
|
|
#endif
|
|
owner = 0;
|
|
while (1) {
|
|
if (count-- <= 0) {
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
count = spin_retry;
|
|
}
|
|
old = ACCESS_ONCE(rw->lock);
|
|
owner = ACCESS_ONCE(rw->owner);
|
|
if (old < 0)
|
|
continue;
|
|
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
|
|
return;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(_raw_read_lock_wait);
|
|
|
|
int _raw_read_trylock_retry(arch_rwlock_t *rw)
|
|
{
|
|
int count = spin_retry;
|
|
int old;
|
|
|
|
while (count-- > 0) {
|
|
old = ACCESS_ONCE(rw->lock);
|
|
if (old < 0)
|
|
continue;
|
|
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(_raw_read_trylock_retry);
|
|
|
|
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
|
|
|
|
void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
|
|
{
|
|
int count = spin_retry;
|
|
int owner, old;
|
|
|
|
owner = 0;
|
|
while (1) {
|
|
if (count-- <= 0) {
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
count = spin_retry;
|
|
}
|
|
old = ACCESS_ONCE(rw->lock);
|
|
owner = ACCESS_ONCE(rw->owner);
|
|
smp_mb();
|
|
if (old >= 0) {
|
|
prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
|
|
old = prev;
|
|
}
|
|
if ((old & 0x7fffffff) == 0 && prev >= 0)
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(_raw_write_lock_wait);
|
|
|
|
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
|
|
|
|
void _raw_write_lock_wait(arch_rwlock_t *rw)
|
|
{
|
|
int count = spin_retry;
|
|
int owner, old, prev;
|
|
|
|
prev = 0x80000000;
|
|
owner = 0;
|
|
while (1) {
|
|
if (count-- <= 0) {
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
smp_yield_cpu(owner - 1);
|
|
count = spin_retry;
|
|
}
|
|
old = ACCESS_ONCE(rw->lock);
|
|
owner = ACCESS_ONCE(rw->owner);
|
|
if (old >= 0 &&
|
|
__atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
|
|
prev = old;
|
|
else
|
|
smp_mb();
|
|
if ((old & 0x7fffffff) == 0 && prev >= 0)
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(_raw_write_lock_wait);
|
|
|
|
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
|
|
|
|
int _raw_write_trylock_retry(arch_rwlock_t *rw)
|
|
{
|
|
int count = spin_retry;
|
|
int old;
|
|
|
|
while (count-- > 0) {
|
|
old = ACCESS_ONCE(rw->lock);
|
|
if (old)
|
|
continue;
|
|
if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(_raw_write_trylock_retry);
|
|
|
|
void arch_lock_relax(int cpu)
|
|
{
|
|
if (!cpu)
|
|
return;
|
|
if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
|
|
return;
|
|
smp_yield_cpu(cpu - 1);
|
|
}
|
|
EXPORT_SYMBOL(arch_lock_relax);
|
|
|
|
void arch_spin_relax(arch_spinlock_t *lp)
|
|
{
|
|
int cpu;
|
|
|
|
cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
|
|
if (!cpu)
|
|
return;
|
|
if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
|
|
return;
|
|
smp_yield_cpu(cpu - 1);
|
|
}
|
|
EXPORT_SYMBOL(arch_spin_relax);
|