From 133e89ef5ef338e1358b16246521ba17d935c396 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 13 May 2016 11:56:26 -0700 Subject: [PATCH 01/67] locking/rwsem: Enable lockless waiter wakeup(s) As wake_qs gain users, we can teach rwsems about them such that waiters can be awoken without the wait_lock. This is for both readers and writer, the former being the most ideal candidate as we can batch the wakeups shortening the critical region that much more -- ie writer task blocking a bunch of tasks waiting to service page-faults (mmap_sem readers). In general applying wake_qs to rwsem (xadd) is not difficult as the wait_lock is intended to be released soon _anyways_, with the exception of when a writer slowpath will proactively wakeup any queued readers if it sees that the lock is owned by a reader, in which we simply do the wakeups with the lock held (see comment in __rwsem_down_write_failed_common()). Similar to other locking primitives, delaying the waiter being awoken does allow, at least in theory, the lock to be stolen in the case of writers, however no harm was seen in this (in fact lock stealing tends to be a _good_ thing in most workloads), and this is a tiny window anyways. Some page-fault (pft) and mmap_sem intensive benchmarks show some pretty constant reduction in systime (by up to ~8 and ~10%) on a 2-socket, 12 core AMD box. In addition, on an 8-core Westmere doing page allocations (page_test) aim9: 4.6-rc6 4.6-rc6 rwsemv2 Min page_test 378167.89 ( 0.00%) 382613.33 ( 1.18%) Min exec_test 499.00 ( 0.00%) 502.67 ( 0.74%) Min fork_test 3395.47 ( 0.00%) 3537.64 ( 4.19%) Hmean page_test 395433.06 ( 0.00%) 414693.68 ( 4.87%) Hmean exec_test 499.67 ( 0.00%) 505.30 ( 1.13%) Hmean fork_test 3504.22 ( 0.00%) 3594.95 ( 2.59%) Stddev page_test 17426.57 ( 0.00%) 26649.92 (-52.93%) Stddev exec_test 0.47 ( 0.00%) 1.41 (-199.05%) Stddev fork_test 63.74 ( 0.00%) 32.59 ( 48.86%) Max page_test 429873.33 ( 0.00%) 456960.00 ( 6.30%) Max exec_test 500.33 ( 0.00%) 507.66 ( 1.47%) Max fork_test 3653.33 ( 0.00%) 3650.90 ( -0.07%) 4.6-rc6 4.6-rc6 rwsemv2 User 1.12 0.04 System 0.23 0.04 Elapsed 727.27 721.98 Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hpe.com Cc: dave@stgolabs.net Cc: jason.low2@hp.com Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/1463165787-25937-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 58 +++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 09e30c6225e5..80b05ac0f015 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -114,12 +114,16 @@ enum rwsem_wake_type { * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - there must be someone on the queue - * - the spinlock must be held by the caller + * - the wait_lock must be held by the caller + * - tasks are marked for wakeup, the caller must later invoke wake_up_q() + * to actually wakeup the blocked task(s) and drop the reference count, + * preferably when the wait_lock is released * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if downgrading is false + * - writers are only marked woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) +__rwsem_mark_wake(struct rw_semaphore *sem, + enum rwsem_wake_type wake_type, struct wake_q_head *wake_q) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type == RWSEM_WAKE_ANY) - /* Wake writer at the front of the queue, but do not - * grant it the lock yet as we want other writers - * to be able to steal it. Readers, on the other hand, - * will block as they will notice the queued writer. + if (wake_type == RWSEM_WAKE_ANY) { + /* + * Mark writer at the front of the queue for wakeup. + * Until the task is actually later awoken later by + * the caller, other writers are able to steal it. + * Readers, on the other hand, will block as they + * will notice the queued writer. */ - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); + } goto out; } @@ -196,7 +203,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) */ smp_mb(); waiter->task = NULL; - wake_up_process(tsk); + wake_q_add(wake_q, tsk); put_task_struct(tsk); } while (--loop); @@ -216,6 +223,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; + WAKE_Q(wake_q); /* set up my own style of waitqueue */ waiter.task = tsk; @@ -238,9 +246,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) if (count == RWSEM_WAITING_BIAS || (count > RWSEM_WAITING_BIAS && adjustment != -RWSEM_ACTIVE_READ_BIAS)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); /* wait to be given the lock */ while (true) { @@ -440,6 +449,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; + WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); @@ -472,8 +482,19 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * no active writers, the lock must be read owned; so we try to * wake any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + if (count > RWSEM_WAITING_BIAS) { + WAKE_Q(wake_q); + + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); + /* + * The wakeup is normally called _after_ the wait_lock + * is released, but given that we are proactively waking + * readers we can deal with the wake_q overhead as it is + * similar to releasing and taking the wait_lock again + * for attempting rwsem_try_write_lock(). + */ + wake_up_q(&wake_q); + } } else count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); @@ -509,8 +530,9 @@ out_nolock: if (list_empty(&sem->wait_list)) rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem); else - __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); return ERR_PTR(-EINTR); } @@ -537,6 +559,7 @@ __visible struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; + WAKE_Q(wake_q); /* * If a spinner is present, it is not necessary to do the wakeup. @@ -573,9 +596,10 @@ locked: /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + wake_up_q(&wake_q); return sem; } @@ -590,14 +614,16 @@ __visible struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; + WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + wake_up_q(&wake_q); return sem; } From e38513905eeaae59056eac2c9ac55a43b1fc41b2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 13 May 2016 11:56:27 -0700 Subject: [PATCH 02/67] locking/rwsem: Rework zeroing reader waiter->task Readers that are awoken will expect a nil ->task indicating that a wakeup has occurred. Because of the way readers are implemented, there's a small chance that the waiter will never block in the slowpath (rwsem_down_read_failed), and therefore requires some form of reference counting to avoid the following scenario: rwsem_down_read_failed() rwsem_wake() get_task_struct(); spin_lock_irq(&wait_lock); list_add_tail(&waiter.list) spin_unlock_irq(&wait_lock); raw_spin_lock_irqsave(&wait_lock) __rwsem_do_wake() while (1) { set_task_state(TASK_UNINTERRUPTIBLE); waiter->task = NULL if (!waiter.task) // true break; schedule() // never reached __set_task_state(TASK_RUNNING); do_exit(); wake_up_process(tsk); // boom ... and therefore race with do_exit() when the caller returns. There is also a mismatch between the smp_mb() and its documentation, in that the serialization is done between reading the task and the nil store. Furthermore, in addition to having the overlapping of loads and stores to waiter->task guaranteed to be ordered within that CPU, both wake_up_process() originally and now wake_q_add() already imply barriers upon successful calls, which serves the comment. Now, as an alternative to perhaps inverting the checks in the blocker side (which has its own penalty in that schedule is unavoidable), with lockless wakeups this situation is naturally addressed and we can just use the refcount held by wake_q_add(), instead doing so explicitly. Of course, we must guarantee that the nil store is done as the _last_ operation in that the task must already be marked for deletion to not fall into the race above. Spurious wakeups are also handled transparently in that the task's reference is only removed when wake_up_q() is actually called _after_ the nil store. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hpe.com Cc: dave@stgolabs.net Cc: jason.low2@hp.com Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/1463165787-25937-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 80b05ac0f015..fcbf75ac3dcb 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -194,17 +194,15 @@ __rwsem_mark_wake(struct rw_semaphore *sem, waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; - /* - * Make sure we do not wakeup the next reader before - * setting the nil condition to grant the next reader; - * otherwise we could miss the wakeup on the other - * side and end up sleeping again. See the pairing - * in rwsem_down_read_failed(). - */ - smp_mb(); - waiter->task = NULL; + wake_q_add(wake_q, tsk); - put_task_struct(tsk); + /* + * Ensure that the last operation is setting the reader + * waiter to nil such that rwsem_down_read_failed() cannot + * race with do_exit() by always holding a reference count + * to the task to wakeup. + */ + smp_store_release(&waiter->task, NULL); } while (--loop); sem->wait_list.next = next; @@ -228,7 +226,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) From c0fcb6c2d332041256dc55d8a1ec3c0a2d0befb8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 16 May 2016 17:38:00 -0700 Subject: [PATCH 03/67] locking/rwsem: Optimize write lock by reducing operations in slowpath When acquiring the rwsem write lock in the slowpath, we first try to set count to RWSEM_WAITING_BIAS. When that is successful, we then atomically add the RWSEM_WAITING_BIAS in cases where there are other tasks on the wait list. This causes write lock operations to often issue multiple atomic operations. We can instead make the list_is_singular() check first, and then set the count accordingly, so that we issue at most 1 atomic operation when acquiring the write lock and reduce unnecessary cacheline contention. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Fenghua Yu Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Hurley Cc: Peter Zijlstra Cc: Richard Henderson Cc: Terry Rudd Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Link: http://lkml.kernel.org/r/1463445486-16078-2-git-send-email-jason.low2@hpe.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index fcbf75ac3dcb..b957da7fcb19 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -261,17 +261,28 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) } EXPORT_SYMBOL(rwsem_down_read_failed); +/* + * This function must be called with the sem->wait_lock held to prevent + * race conditions between checking the rwsem wait list and setting the + * sem->count accordingly. + */ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) { /* - * Try acquiring the write lock. Check count first in order - * to reduce unnecessary expensive cmpxchg() operations. + * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS. */ - if (count == RWSEM_WAITING_BIAS && - cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, - RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { - if (!list_is_singular(&sem->wait_list)) - rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + if (count != RWSEM_WAITING_BIAS) + return false; + + /* + * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there + * are other tasks on the wait list, we need to add on WAITING_BIAS. + */ + count = list_is_singular(&sem->wait_list) ? + RWSEM_ACTIVE_WRITE_BIAS : + RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS; + + if (cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) { rwsem_set_owner(sem); return true; } From 6e2814745c67ab422b86262b05e6f23a56f28aa3 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 20 May 2016 15:19:36 -0700 Subject: [PATCH 04/67] locking/mutex: Set and clear owner using WRITE_ONCE() The mutex owner can get read and written to locklessly. Use WRITE_ONCE when setting and clearing the owner field in order to avoid optimizations such as store tearing. This avoids situations where the owner field gets written to with multiple stores and another thread could concurrently read and use a partially written owner value. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Acked-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Scott J Norton Cc: Terry Rudd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1463782776.2479.9.camel@j-VirtualBox Signed-off-by: Ingo Molnar --- kernel/locking/mutex-debug.h | 4 ++-- kernel/locking/mutex.h | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..372e6530180d 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -29,12 +29,12 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + WRITE_ONCE(lock->owner, current); } static inline void mutex_clear_owner(struct mutex *lock) { - lock->owner = NULL; + WRITE_ONCE(lock->owner, NULL); } #define spin_lock_mutex(lock, flags) \ diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..12f96199441c 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -17,14 +17,20 @@ __list_del((waiter)->list.prev, (waiter)->list.next) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * The mutex owner can get read and written to locklessly. + * We should use WRITE_ONCE when writing the owner value to + * avoid store tearing, otherwise, a thread could potentially + * read a partially written and incomplete owner value. + */ static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + WRITE_ONCE(lock->owner, current); } static inline void mutex_clear_owner(struct mutex *lock) { - lock->owner = NULL; + WRITE_ONCE(lock->owner, NULL); } #else static inline void mutex_set_owner(struct mutex *lock) From ed8ebd1d514126c0e54fbdbd231427dc91c877c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 May 2016 16:11:57 -0400 Subject: [PATCH 05/67] percpu, locking: Revert ("percpu: Replace smp_read_barrier_depends() with lockless_dereference()") lockless_dereference() is planned to grow a sanity check to ensure that the input parameter is a pointer. __ref_is_percpu() passes in an unsinged long value which is a combination of a pointer and a flag. While it can be casted to a pointer lvalue, the casting looks messy and it's a special case anyway. Let's revert back to open-coding READ_ONCE() and explicit barrier. This doesn't cause any functional changes. Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Pranith Kumar Cc: Thomas Gleixner Cc: kernel-team@fb.com Link: http://lkml.kernel.org/g/20160522185040.GA23664@p183.telecom.by Signed-off-by: Ingo Molnar --- include/linux/percpu-refcount.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 84f542df7ff5..1c7eec09e5eb 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, * used as a pointer. If the compiler generates a separate fetch * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that - * ACCESS_ONCE() is required when fetching it. - * - * Also, we need a data dependency barrier to be paired with - * smp_store_release() in __percpu_ref_switch_to_percpu(). - * - * Use lockless deref which contains both. + * READ_ONCE() is required when fetching it. */ - percpu_ptr = lockless_dereference(ref->percpu_count_ptr); + percpu_ptr = READ_ONCE(ref->percpu_count_ptr); + + /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */ + smp_read_barrier_depends(); /* * Theoretically, the following could test just ATOMIC; however, From dfaaf3fa01d65cf6e2072965bb0b7aaa7285344f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2016 18:31:33 +0200 Subject: [PATCH 06/67] locking/lockdep: Use __jhash_mix() for iterate_chain_key() Use __jhash_mix() to mix the class_idx into the class_key. This function provides better mixing than the previously used, home grown mix function. Leave hashing to the professionals :-) Suggested-by: George Spelvin Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 81f1a7107c0e..589d763a49b3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE]; * It's a 64-bit hash, because it's important for the keys to be * unique. */ -#define iterate_chain_key(key1, key2) \ - (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ - ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ - (key2)) +static inline u64 iterate_chain_key(u64 key, u32 idx) +{ + u32 k0 = key, k1 = key >> 32; + + __jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */ + + return k0 | (u64)k1 << 32; +} void lockdep_off(void) { From a461d58792d4a46b8b10ae50973ec9b2763b694e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 27 May 2016 15:47:18 +0200 Subject: [PATCH 07/67] locking/rtmutex: Only warn once on a trylock from bad context One warning should be enough to get one motivated to fix this. It is possible that this happens more than once and that starts flooding the output. Later the prints will be suppressed so we only get half of it. Depending on the console system used it might not be helpful. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1464356838-1755-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- kernel/locking/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 3e746607abe5..1ec0f48962b3 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); */ int __sched rt_mutex_trylock(struct rt_mutex *lock) { - if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq())) + if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) return 0; return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); From 331b6d8c7afc2e5b900b9dcd850c265e1ba8d8e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 22 May 2016 12:48:27 +0200 Subject: [PATCH 08/67] locking/barriers: Validate lockless_dereference() is used on a pointer type Use the type to validate the argument @p is indeed a pointer type. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160522104827.GP3193@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 793c0829e3a3..06f27fd9d760 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -545,10 +545,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * Similar to rcu_dereference(), but for situations where the pointed-to * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. + * + * The seemingly unused void * variable is to validate @p is indeed a pointer + * type. All pointer types silently cast to void *. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ + __maybe_unused const void * const _________p2 = _________p1; \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) From 8d53fa19041ae65c484d81d75179b4a577e6d8e4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jun 2016 09:12:30 +0200 Subject: [PATCH 09/67] locking/qspinlock: Clarify xchg_tail() ordering While going over the code I noticed that xchg_tail() is a RELEASE but had no obvious pairing commented. It pairs with a somewhat unique address dependency through decode_tail(). So the store-release of xchg_tail() is paired by the address dependency of the load of xchg_tail followed by the dereference from the pointer computed from that load. The @old -> @prev transformation itself is pure, and therefore does not depend on external state, so that is immaterial wrt. ordering. Signed-off-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5fc8c311b8fe..ee7deb08d43d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -90,7 +90,7 @@ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); * therefore increment the cpu number by one. */ -static inline u32 encode_tail(int cpu, int idx) +static inline __pure u32 encode_tail(int cpu, int idx) { u32 tail; @@ -103,7 +103,7 @@ static inline u32 encode_tail(int cpu, int idx) return tail; } -static inline struct mcs_spinlock *decode_tail(u32 tail) +static inline __pure struct mcs_spinlock *decode_tail(u32 tail) { int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; @@ -455,6 +455,8 @@ queue: * pending stuff. * * p,*,* -> n,*,* + * + * RELEASE, such that the stores to @node must be complete. */ old = xchg_tail(lock, tail); next = NULL; @@ -465,6 +467,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); + /* + * The above xchg_tail() is also a load of @lock which generates, + * through decode_tail(), a pointer. + * + * The address dependency matches the RELEASE of xchg_tail() + * such that the access to @prev must happen after. + */ + smp_read_barrier_depends(); + WRITE_ONCE(prev->next, node); pv_wait_node(node, prev); From 055ce0fd1b86c204430cbc0887165599d6e15090 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jun 2016 10:36:53 +0200 Subject: [PATCH 10/67] locking/qspinlock: Add comments I figured we need to document the spin_is_locked() and spin_unlock_wait() constraints somwehere. Ideally 'someone' would rewrite Documentation/atomic_ops.txt and we could find a place in there. But currently that document is stale to the point of hardly being useful. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ee7deb08d43d..2f9153b183c9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -267,6 +267,63 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +/* + * Various notes on spin_is_locked() and spin_unlock_wait(), which are + * 'interesting' functions: + * + * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE + * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64, + * PPC). Also qspinlock has a similar issue per construction, the setting of + * the locked byte can be unordered acquiring the lock proper. + * + * This gets to be 'interesting' in the following cases, where the /should/s + * end up false because of this issue. + * + * + * CASE 1: + * + * So the spin_is_locked() correctness issue comes from something like: + * + * CPU0 CPU1 + * + * global_lock(); local_lock(i) + * spin_lock(&G) spin_lock(&L[i]) + * for (i) if (!spin_is_locked(&G)) { + * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep(); + * return; + * } + * // deal with fail + * + * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such + * that there is exclusion between the two critical sections. + * + * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from + * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i]) + * /should/ be constrained by the ACQUIRE from spin_lock(&G). + * + * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB. + * + * + * CASE 2: + * + * For spin_unlock_wait() there is a second correctness issue, namely: + * + * CPU0 CPU1 + * + * flag = set; + * smp_mb(); spin_lock(&l) + * spin_unlock_wait(&l); if (!flag) + * // add to lockless list + * spin_unlock(&l); + * // iterate lockless list + * + * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0 + * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE + * semantics etc..) + * + * Where flag /should/ be ordered against the locked store of l. + */ + /* * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before * issuing an _unordered_ store to set _Q_LOCKED_VAL. From 8ee62b1870be8e630158701632a533d0378e15b8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 3 Jun 2016 22:26:02 -0700 Subject: [PATCH 11/67] locking/rwsem: Convert sem->count to 'atomic_long_t' Convert the rwsem count variable to an atomic_long_t since we use it as an atomic variable. This also allows us to remove the rwsem_atomic_{add,update}() "abstraction" which would now be an unnecesary level of indirection. In follow up patches, we also remove the rwsem_atomic_{add,update}() definitions across the various architectures. Suggested-by: Peter Zijlstra Signed-off-by: Jason Low [ Build warning fixes on various architectures. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Fenghua Yu Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Paul E. McKenney Cc: Peter Hurley Cc: Terry Rudd Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Waiman Long Link: http://lkml.kernel.org/r/1465017963-4839-2-git-send-email-jason.low2@hpe.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/rwsem.h | 26 +++++++++++++------------- arch/ia64/include/asm/rwsem.h | 24 ++++++++++++------------ include/asm-generic/rwsem.h | 6 +++--- include/linux/rwsem.h | 8 +++++--- kernel/locking/rwsem-xadd.c | 32 +++++++++++++++++--------------- 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 0131a7058778..b40021aabb9f 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long old, new, res; - res = sem->count; + res = atomic_long_read(&sem->count); do { new = res + RWSEM_ACTIVE_READ_BIAS; if (new <= 0) break; old = res; - res = cmpxchg(&sem->count, old, new); + res = atomic_long_cmpxchg(&sem->count, old, new); } while (res != old); return res >= 0 ? 1 : 0; } @@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_WRITE_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; #else long temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; @@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem) { long count; #ifndef CONFIG_SMP - sem->count -= RWSEM_ACTIVE_WRITE_BIAS; - count = sem->count; + sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count.counter; #else long temp; __asm__ __volatile__( @@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_WAITING_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_WAITING_BIAS; #else long temp; __asm__ __volatile__( diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 8b23e070b844..c5d544f188ed 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -40,7 +40,7 @@ static inline void __down_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old + RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_acq(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); return old; } @@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -115,8 +115,8 @@ static inline int __down_read_trylock (struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { return 1; } } @@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long tmp = atomic_long_cmpxchg_acquire(&sem->count, + RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -143,9 +143,9 @@ __downgrade_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_WAITING_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (old < 0) rwsem_downgrade_wake(sem); diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 3fc94a046bf5..a3a93eca766c 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acquire(&sem->count, tmp, + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } @@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; - tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, + tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index d37fbb34d06f..dd1d14250340 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -23,10 +23,11 @@ struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK #include /* use a generic implementation */ +#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE #else /* All arch specific implementations share the same struct */ struct rw_semaphore { - long count; + atomic_long_t count; struct list_head wait_list; raw_spinlock_t wait_lock; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return sem->count != 0; + return atomic_long_read(&sem->count) != 0; } +#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) #endif /* Common initializer macros and functions */ @@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #endif #define __RWSEM_INITIALIZER(name) \ - { .count = RWSEM_UNLOCKED_VALUE, \ + { __RWSEM_INIT_COUNT(name), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index b957da7fcb19..63b40a5c62ec 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif - sem->count = RWSEM_UNLOCKED_VALUE; + atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -153,10 +153,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem, if (wake_type != RWSEM_WAKE_READ_OWNED) { adjustment = RWSEM_ACTIVE_READ_BIAS; try_reader_grant: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { /* A writer stole the lock. Undo our reader grant. */ - if (rwsem_atomic_update(-adjustment, sem) & + if (atomic_long_sub_return(adjustment, &sem->count) & RWSEM_ACTIVE_MASK) goto out; /* Last active locker left. Retry waking readers. */ @@ -186,7 +187,7 @@ __rwsem_mark_wake(struct rw_semaphore *sem, adjustment -= RWSEM_WAITING_BIAS; if (adjustment) - rwsem_atomic_add(adjustment, sem); + atomic_long_add(adjustment, &sem->count); next = sem->wait_list.next; loop = woken; @@ -233,7 +234,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); + count = atomic_long_add_return(adjustment, &sem->count); /* If there are no active locks, wake the front queued process(es). * @@ -282,7 +283,8 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) RWSEM_ACTIVE_WRITE_BIAS : RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS; - if (cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) { + if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) + == RWSEM_WAITING_BIAS) { rwsem_set_owner(sem); return true; } @@ -296,13 +298,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = READ_ONCE(sem->count); + long old, count = atomic_long_read(&sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; - old = cmpxchg_acquire(&sem->count, count, + old = atomic_long_cmpxchg_acquire(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); if (old == count) { rwsem_set_owner(sem); @@ -324,7 +326,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_lock(); owner = READ_ONCE(sem->owner); if (!owner) { - long count = READ_ONCE(sem->count); + long count = atomic_long_read(&sem->count); /* * If sem->owner is not set, yet we have just recently entered the * slowpath with the lock being active, then there is a possibility @@ -375,7 +377,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) * held by readers. Check the counter to verify the * state. */ - count = READ_ONCE(sem->count); + count = atomic_long_read(&sem->count); return (count == 0 || count == RWSEM_WAITING_BIAS); } @@ -460,7 +462,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ - count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); + count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); /* do optimistic spinning and steal lock if possible */ if (rwsem_optimistic_spin(sem)) @@ -483,7 +485,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = READ_ONCE(sem->count); + count = atomic_long_read(&sem->count); /* * If there were already threads queued before us and there are @@ -505,7 +507,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) } } else - count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count); /* wait until we successfully acquire the lock */ set_current_state(state); @@ -521,7 +523,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) schedule(); set_current_state(state); - } while ((count = sem->count) & RWSEM_ACTIVE_MASK); + } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } @@ -536,7 +538,7 @@ out_nolock: raw_spin_lock_irq(&sem->wait_lock); list_del(&waiter.list); if (list_empty(&sem->wait_list)) - rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem); + atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); else __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); From d157bd860f1c828593730dca594d0ce51956833b Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 16 May 2016 17:38:02 -0700 Subject: [PATCH 12/67] locking/rwsem: Remove rwsem_atomic_add() and rwsem_atomic_update() The rwsem-xadd count has been converted to an atomic variable and the rwsem code now directly uses atomic_long_add() and atomic_long_add_return(), so we can remove the arch implementations of rwsem_atomic_add() and rwsem_atomic_update(). Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Davidlohr Bueso Cc: Fenghua Yu Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Hurley Cc: Peter Zijlstra Cc: Richard Henderson Cc: Terry Rudd Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Waiman Long Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/rwsem.h | 42 ---------------------------------- arch/ia64/include/asm/rwsem.h | 7 ------ arch/s390/include/asm/rwsem.h | 37 ------------------------------ arch/x86/include/asm/rwsem.h | 18 --------------- include/asm-generic/rwsem.h | 16 ------------- 5 files changed, 120 deletions(-) diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index b40021aabb9f..77873d0ad293 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (sem->count) - :"Ir" (val), "m" (sem->count)); -#endif -} - -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; - return sem->count; -#else - long ret, temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %2,%1\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (ret), "=m" (sem->count), "=&r" (temp) - :"Ir" (val), "m" (sem->count)); - - return ret; -#endif -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index c5d544f188ed..8fa98dd303b4 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -151,11 +151,4 @@ __downgrade_write (struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 - * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. - */ -#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index c75e4471e618..597e7e96b59e 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); - return new; -} - #endif /* _S390_RWSEM_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..089ced4edbbc 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "memory", "cc"); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" - : "+m" (sem->count) - : "er" (delta)); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return delta + xadd(&sem->count, delta); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index a3a93eca766c..5be122e3d326 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -106,14 +106,6 @@ static inline void __up_write(struct rw_semaphore *sem) rwsem_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - atomic_long_add(delta, (atomic_long_t *)&sem->count); -} - /* * downgrade write lock to read lock */ @@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); -} - #endif /* __KERNEL__ */ #endif /* _ASM_GENERIC_RWSEM_H */ From 19c5d690e41697fcdd19379ab9d10d8d37818414 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 May 2016 21:26:19 -0400 Subject: [PATCH 13/67] locking/rwsem: Add reader-owned state to the owner field Currently, it is not possible to determine for sure if a reader owns a rwsem by looking at the content of the rwsem data structure. This patch adds a new state RWSEM_READER_OWNED to the owner field to indicate that readers currently own the lock. This enables us to address the following 2 issues in the rwsem optimistic spinning code: 1) rwsem_can_spin_on_owner() will disallow optimistic spinning if the owner field is NULL which can mean either the readers own the lock or the owning writer hasn't set the owner field yet. In the latter case, we miss the chance to do optimistic spinning. 2) While a writer is waiting in the OSQ and a reader takes the lock, the writer will continue to spin when out of the OSQ in the main rwsem_optimistic_spin() loop as the owner field is NULL wasting CPU cycles if some of readers are sleeping. Adding the new state will allow optimistic spinning to go forward as long as the owner field is not RWSEM_READER_OWNED and the owner is running, if set, but stop immediately when that state has been reached. On a 4-socket Haswell machine running on a 4.6-rc1 based kernel, the fio test with multithreaded randrw and randwrite tests on the same file on a XFS partition on top of a NVDIMM were run, the aggregated bandwidths before and after the patch were as follows: Test BW before patch BW after patch % change ---- --------------- -------------- -------- randrw 988 MB/s 1192 MB/s +21% randwrite 1513 MB/s 1623 MB/s +7.3% The perf profile of the rwsem_down_write_failed() function in randrw before and after the patch were: 19.95% 5.88% fio [kernel.vmlinux] [k] rwsem_down_write_failed 14.20% 1.52% fio [kernel.vmlinux] [k] rwsem_down_write_failed The actual CPU cycles spend in rwsem_down_write_failed() dropped from 5.88% to 1.52% after the patch. The xfstests was also run and no regression was observed. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jason Low Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Dave Chinner Cc: Douglas Hatch Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Hurley Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1463534783-38814-2-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 41 ++++++++++++++++++++----------------- kernel/locking/rwsem.c | 8 ++++++-- kernel/locking/rwsem.h | 41 +++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 63b40a5c62ec..6b0d0605910e 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -163,6 +163,12 @@ __rwsem_mark_wake(struct rw_semaphore *sem, /* Last active locker left. Retry waking readers. */ goto try_reader_grant; } + /* + * It is not really necessary to set it to reader-owned here, + * but it gives the spinners an early indication that the + * readers now have the lock. + */ + rwsem_set_reader_owned(sem); } /* Grant an infinite number of read locks to the readers at the front @@ -325,16 +331,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_lock(); owner = READ_ONCE(sem->owner); - if (!owner) { - long count = atomic_long_read(&sem->count); + if (!rwsem_owner_is_writer(owner)) { /* - * If sem->owner is not set, yet we have just recently entered the - * slowpath with the lock being active, then there is a possibility - * reader(s) may have the lock. To be safe, bail spinning in these - * situations. + * Don't spin if the rwsem is readers owned. */ - if (count & RWSEM_ACTIVE_MASK) - ret = false; + ret = !rwsem_owner_is_reader(owner); goto done; } @@ -347,8 +348,6 @@ done: static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) { - long count; - rcu_read_lock(); while (sem->owner == owner) { /* @@ -369,16 +368,11 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) } rcu_read_unlock(); - if (READ_ONCE(sem->owner)) - return true; /* new owner, continue spinning */ - /* - * When the owner is not set, the lock could be free or - * held by readers. Check the counter to verify the - * state. + * If there is a new owner or the owner is not set, we continue + * spinning. */ - count = atomic_long_read(&sem->count); - return (count == 0 || count == RWSEM_WAITING_BIAS); + return !rwsem_owner_is_reader(READ_ONCE(sem->owner)); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) @@ -397,7 +391,16 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) while (true) { owner = READ_ONCE(sem->owner); - if (owner && !rwsem_spin_on_owner(sem, owner)) + /* + * Don't spin if + * 1) the owner is a reader as we we can't determine if the + * reader is actively running or not. + * 2) The rwsem_spin_on_owner() returns false which means + * the owner isn't running. + */ + if (rwsem_owner_is_reader(owner) || + (rwsem_owner_is_writer(owner) && + !rwsem_spin_on_owner(sem, owner))) break; /* wait_lock will be acquired if write_lock is obtained */ diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2e853ad93a3a..45ba475d4be3 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem) rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); + rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read); @@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem) { int ret = __down_read_trylock(sem); - if (ret == 1) + if (ret == 1) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); + rwsem_set_reader_owned(sem); + } return ret; } @@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem) * lockdep: a downgraded write will live on as a write * dependency. */ - rwsem_clear_owner(sem); + rwsem_set_reader_owned(sem); __downgrade_write(sem); } @@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); + rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read_nested); diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 870ed9a5b426..8f43ba234787 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -1,3 +1,20 @@ +/* + * The owner field of the rw_semaphore structure will be set to + * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear + * the owner field when it unlocks. A reader, on the other hand, will + * not touch the owner field when it unlocks. + * + * In essence, the owner field now has the following 3 states: + * 1) 0 + * - lock is free or the owner hasn't set the field yet + * 2) RWSEM_READER_OWNED + * - lock is currently or previously owned by readers (lock is free + * or not set by owner yet) + * 3) Other non-zero value + * - a writer owns the lock + */ +#define RWSEM_READER_OWNED ((struct task_struct *)1UL) + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER static inline void rwsem_set_owner(struct rw_semaphore *sem) { @@ -9,6 +26,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem) sem->owner = NULL; } +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) +{ + /* + * We check the owner value first to make sure that we will only + * do a write to the rwsem cacheline when it is really necessary + * to minimize cacheline contention. + */ + if (sem->owner != RWSEM_READER_OWNED) + sem->owner = RWSEM_READER_OWNED; +} + +static inline bool rwsem_owner_is_writer(struct task_struct *owner) +{ + return owner && owner != RWSEM_READER_OWNED; +} + +static inline bool rwsem_owner_is_reader(struct task_struct *owner) +{ + return owner == RWSEM_READER_OWNED; +} #else static inline void rwsem_set_owner(struct rw_semaphore *sem) { @@ -17,4 +54,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem) static inline void rwsem_clear_owner(struct rw_semaphore *sem) { } + +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) +{ +} #endif From fb6a44f33be542fd81575ff93a4e8118d6a58592 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 May 2016 21:26:20 -0400 Subject: [PATCH 14/67] locking/rwsem: Protect all writes to owner by WRITE_ONCE() Without using WRITE_ONCE(), the compiler can potentially break a write into multiple smaller ones (store tearing). So a read from the same data by another task concurrently may return a partial result. This can result in a kernel crash if the data is a memory address that is being dereferenced. This patch changes all write to rwsem->owner to use WRITE_ONCE() to make sure that store tearing will not happen. READ_ONCE() may not be needed for rwsem->owner as long as the value is only used for comparison and not dereferencing. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Dave Chinner Cc: Davidlohr Bueso Cc: Douglas Hatch Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Hurley Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1463534783-38814-3-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 8f43ba234787..a699f4048ba1 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -16,14 +16,21 @@ #define RWSEM_READER_OWNED ((struct task_struct *)1UL) #ifdef CONFIG_RWSEM_SPIN_ON_OWNER +/* + * All writes to owner are protected by WRITE_ONCE() to make sure that + * store tearing can't happen as optimistic spinners may read and use + * the owner value concurrently without lock. Read from owner, however, + * may not need READ_ONCE() as long as the pointer value is only used + * for comparison and isn't being dereferenced. + */ static inline void rwsem_set_owner(struct rw_semaphore *sem) { - sem->owner = current; + WRITE_ONCE(sem->owner, current); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) { - sem->owner = NULL; + WRITE_ONCE(sem->owner, NULL); } static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) @@ -34,7 +41,7 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) * to minimize cacheline contention. */ if (sem->owner != RWSEM_READER_OWNED) - sem->owner = RWSEM_READER_OWNED; + WRITE_ONCE(sem->owner, RWSEM_READER_OWNED); } static inline bool rwsem_owner_is_writer(struct task_struct *owner) From bf7b4c472db44413251bcef79ca1f6bf1ec81475 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 May 2016 21:26:22 -0400 Subject: [PATCH 15/67] locking/rwsem: Improve reader wakeup code In __rwsem_do_wake(), the reader wakeup code will assume a writer has stolen the lock if the active reader/writer count is not 0. However, this is not as reliable an indicator as the original "< RWSEM_WAITING_BIAS" check. If another reader is present, the code will still break out and exit even if the writer is gone. This patch changes it to check the same "< RWSEM_WAITING_BIAS" condition to reduce the chance of false positive. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Peter Hurley Cc: Andrew Morton Cc: Dave Chinner Cc: Davidlohr Bueso Cc: Douglas Hatch Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1463534783-38814-5-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 6b0d0605910e..4f1daf5a472d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -156,9 +156,14 @@ __rwsem_mark_wake(struct rw_semaphore *sem, oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment; if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { - /* A writer stole the lock. Undo our reader grant. */ - if (atomic_long_sub_return(adjustment, &sem->count) & - RWSEM_ACTIVE_MASK) + /* + * If the count is still less than RWSEM_WAITING_BIAS + * after removing the adjustment, it is assumed that + * a writer has stolen the lock. We have to undo our + * reader grant. + */ + if (atomic_long_add_return(-adjustment, &sem->count) < + RWSEM_WAITING_BIAS) goto out; /* Last active locker left. Retry waking readers. */ goto try_reader_grant; From ddd0fa73c2b71c35de4fe7ae60a5f1a6cddc2cf0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 May 2016 21:26:23 -0400 Subject: [PATCH 16/67] locking/rwsem: Streamline the rwsem_optimistic_spin() code This patch moves the owner loading and checking code entirely inside of rwsem_spin_on_owner() to simplify the logic of rwsem_optimistic_spin() loop. Suggested-by: Peter Hurley Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Peter Hurley Cc: Andrew Morton Cc: Dave Chinner Cc: Davidlohr Bueso Cc: Douglas Hatch Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1463534783-38814-6-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 38 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 4f1daf5a472d..2031281bb940 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -350,9 +350,16 @@ done: return ret; } -static noinline -bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) +/* + * Return true only if we can still spin on the owner field of the rwsem. + */ +static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) { + struct task_struct *owner = READ_ONCE(sem->owner); + + if (!rwsem_owner_is_writer(owner)) + goto out; + rcu_read_lock(); while (sem->owner == owner) { /* @@ -372,7 +379,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) cpu_relax_lowlatency(); } rcu_read_unlock(); - +out: /* * If there is a new owner or the owner is not set, we continue * spinning. @@ -382,7 +389,6 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) static bool rwsem_optimistic_spin(struct rw_semaphore *sem) { - struct task_struct *owner; bool taken = false; preempt_disable(); @@ -394,21 +400,17 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) if (!osq_lock(&sem->osq)) goto done; - while (true) { - owner = READ_ONCE(sem->owner); + /* + * Optimistically spin on the owner field and attempt to acquire the + * lock whenever the owner changes. Spinning will be stopped when: + * 1) the owning writer isn't running; or + * 2) readers own the lock as we can't determine if they are + * actively running or not. + */ + while (rwsem_spin_on_owner(sem)) { /* - * Don't spin if - * 1) the owner is a reader as we we can't determine if the - * reader is actively running or not. - * 2) The rwsem_spin_on_owner() returns false which means - * the owner isn't running. + * Try to acquire the lock */ - if (rwsem_owner_is_reader(owner) || - (rwsem_owner_is_writer(owner) && - !rwsem_spin_on_owner(sem, owner))) - break; - - /* wait_lock will be acquired if write_lock is obtained */ if (rwsem_try_write_lock_unqueued(sem)) { taken = true; break; @@ -420,7 +422,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) * we're an RT task that will live-lock because we won't let * the owner complete. */ - if (!owner && (need_resched() || rt_task(current))) + if (!sem->owner && (need_resched() || rt_task(current))) break; /* From 6428671bae97caa7040e24e79e969fd87908f4f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Jun 2016 20:58:15 +0200 Subject: [PATCH 17/67] locking/mutex: Optimize mutex_trylock() fast-path A while back Viro posted a number of 'interesting' mutex_is_locked() users on IRC, one of those was RCU. RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the regular load before modify pattern. While the use isn't wrong per se, its curious in that its needed at all, mutex_trylock() should be good enough on its own to avoid the pointless cacheline bounces. So fix those and remove the mutex_is_locked() (ab)use from RCU. Reported-by: Al Viro Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paul McKenney Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/20160601185815.GW3190@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/mutex.h | 2 +- arch/powerpc/include/asm/mutex.h | 2 +- arch/x86/include/asm/mutex_32.h | 2 +- arch/x86/include/asm/mutex_64.h | 6 +++--- include/asm-generic/mutex-dec.h | 2 +- include/asm-generic/mutex-xchg.h | 6 +++++- kernel/rcu/tree.c | 1 - 7 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h index f41e66d65e31..28cb819e0ff9 100644 --- a/arch/ia64/include/asm/mutex.h +++ b/arch/ia64/include/asm/mutex.h @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (cmpxchg_acq(count, 1, 0) == 1) + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) return 1; return 0; } diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 127ab23e1f6c..078155fa1189 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) return 1; return 0; } diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 85e6cda45a02..e9355a84fc67 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; return 0; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 07537a44216e..d9850758464e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -118,10 +118,10 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; - else - return 0; + + return 0; } #endif /* _ASM_X86_MUTEX_64_H */ diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h index fd694cfd678a..c54829d3de37 100644 --- a/include/asm-generic/mutex-dec.h +++ b/include/asm-generic/mutex-dec.h @@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1)) return 1; return 0; } diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h index a6b4a7bd6ac9..3269ec4e195f 100644 --- a/include/asm-generic/mutex-xchg.h +++ b/include/asm-generic/mutex-xchg.h @@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - int prev = atomic_xchg_acquire(count, 0); + int prev; + if (atomic_read(count) != 1) + return 0; + + prev = atomic_xchg_acquire(count, 0); if (unlikely(prev < 0)) { /* * The lock was marked contended so we must restore that diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c7f1bc4f817c..b7326893221f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && (rnp == rnp_root || ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && - !mutex_is_locked(&rsp->exp_mutex) && mutex_trylock(&rsp->exp_mutex)) goto fastpath; From ca50e426f96c905e7d14a9c7a6bd4e0330516047 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Fri, 3 Jun 2016 16:38:14 +0800 Subject: [PATCH 18/67] locking/qspinlock: Use atomic_sub_return_release() in queued_spin_unlock() The existing version uses a heavy barrier while only release semantics is required. So use atomic_sub_return_release() instead. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: waiman.long@hp.com Link: http://lkml.kernel.org/r/1464943094-3129-1-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 05f05f17a7c2..9f0681bf1e87 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,10 +111,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static __always_inline void queued_spin_unlock(struct qspinlock *lock) { /* - * smp_mb__before_atomic() in order to guarantee release semantics + * unlock() needs release semantics: */ - smp_mb__before_atomic(); - atomic_sub(_Q_LOCKED_VAL, &lock->val); + (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val); } #endif From d4c3be70ca0e7a1ae308bedd3462900c61e97b11 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Jun 2016 12:20:25 +0200 Subject: [PATCH 19/67] MAINTAINERS: Update locking tree description and file patterns Update the file patterns, the Git tree URI and also widen the scope from 'LOCKDEP and LOCKSTAT' to 'LOCKING PRIMITIVES'. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: linux-kernel@vger.kernel.org --- MAINTAINERS | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ed42cb65a19b..daa85ac5f435 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7006,15 +7006,23 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/usb/dvb-usb-v2/lmedm04* -LOCKDEP AND LOCKSTAT +LOCKING PRIMITIVES M: Peter Zijlstra M: Ingo Molnar L: linux-kernel@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core S: Maintained -F: Documentation/locking/lockdep*.txt -F: Documentation/locking/lockstat.txt +F: Documentation/locking/ F: include/linux/lockdep.h +F: include/linux/spinlock*.h +F: arch/*/include/asm/spinlock*.h +F: include/linux/rwlock*.h +F: include/linux/mutex*.h +F: arch/*/include/asm/mutex*.h +F: include/linux/rwsem*.h +F: arch/*/include/asm/rwsem.h +F: include/linux/seqlock.h +F: lib/locking*.[ch] F: kernel/locking/ LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) From 1f03e8d2919270bd6ef64f39a45ce8df8a9f012a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Apr 2016 10:57:12 +0200 Subject: [PATCH 20/67] locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acquire() This new form allows using hardware assisted waiting. Some hardware (ARM64 and x86) allow monitoring an address for changes, so by providing a pointer we can use this to replace the cpu_relax() with hardware optimized methods in the future. Requested-by: Will Deacon Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 25 +++++++++++++++++++------ kernel/locking/qspinlock.c | 12 ++++++------ kernel/sched/core.c | 8 ++++---- kernel/sched/sched.h | 2 +- kernel/smp.c | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 06f27fd9d760..2bcaedc0f032 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s }) /** - * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on * @cond: boolean expression to wait for * * Equivalent to using smp_load_acquire() on the condition variable but employs * the control dependency of the wait to reduce the barrier on many platforms. * + * Due to C lacking lambda expressions we load the value of *ptr into a + * pre-named variable @VAL to be used in @cond. + * * The control dependency provides a LOAD->STORE order, the additional RMB * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, * aka. ACQUIRE. */ -#define smp_cond_acquire(cond) do { \ - while (!(cond)) \ - cpu_relax(); \ - smp_rmb(); /* ctrl + rmb := acquire */ \ -} while (0) +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + } \ + smp_rmb(); /* ctrl + rmb := acquire */ \ + VAL; \ +}) +#endif #endif /* __KERNEL__ */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 2f9153b183c9..1b8dda90ebfa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * sequentiality; this is because not all clear_pending_set_locked() * implementations imply full barriers. */ - smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK)); + smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); /* * take ownership and clear the pending bit. @@ -562,7 +562,7 @@ queue: * * The PV pv_wait_head_or_lock function, if active, will acquire * the lock and return a non-zero value. So we have to skip the - * smp_cond_acquire() call. As the next PV queue head hasn't been + * smp_cond_load_acquire() call. As the next PV queue head hasn't been * designated yet, there is no way for the locked value to become * _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. @@ -573,7 +573,7 @@ queue: if ((val = pv_wait_head_or_lock(lock, node))) goto locked; - smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)); + val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK)); locked: /* @@ -593,9 +593,9 @@ locked: break; } /* - * The smp_cond_acquire() call above has provided the necessary - * acquire semantics required for locking. At most two - * iterations of this loop may be ran. + * The smp_cond_load_acquire() call above has provided the + * necessary acquire semantics required for locking. At most + * two iterations of this loop may be ran. */ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); if (old == val) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017d5394f5dc..5cd6931cb2cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * chain to provide order. Instead we do: * * 1) smp_store_release(X->on_cpu, 0) - * 2) smp_cond_acquire(!X->on_cpu) + * 2) smp_cond_load_acquire(!X->on_cpu) * * Example: * @@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * sched-out X * smp_store_release(X->on_cpu, 0); * - * smp_cond_acquire(!X->on_cpu); + * smp_cond_load_acquire(&X->on_cpu, !VAL); * X->state = WAKING * set_task_cpu(X,2) * @@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * This means that any means of doing remote wakeups must order the CPU doing * the wakeup against the CPU the task is going to end up running on. This, * however, is already required for the regular Program-Order guarantee above, - * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire). + * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire). * */ @@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * This ensures that tasks getting woken will be fully ordered against * their previous state and preserve Program Order. */ - smp_cond_acquire(!p->on_cpu); + smp_cond_load_acquire(&p->on_cpu, !VAL); p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 72f1f3087b04..425bf5ddaa5a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * In particular, the load of prev->state in finish_task_switch() must * happen before this. * - * Pairs with the smp_cond_acquire() in try_to_wake_up(). + * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). */ smp_store_release(&prev->on_cpu, 0); #endif diff --git a/kernel/smp.c b/kernel/smp.c index 74165443c240..36552beed397 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -107,7 +107,7 @@ void __init call_function_init(void) */ static __always_inline void csd_lock_wait(struct call_single_data *csd) { - smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK)); + smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); } static __always_inline void csd_lock(struct call_single_data *csd) From 33ac279677dcc2441cb93d8cb9cf7a74df62814d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2016 13:17:12 +0200 Subject: [PATCH 21/67] locking/barriers: Introduce smp_acquire__after_ctrl_dep() Introduce smp_acquire__after_ctrl_dep(), this construct is not uncommon, but the lack of this barrier is. Use it to better express smp_rmb() uses in WRITE_ONCE(), the IPC semaphore code and the qspinlock code. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 17 ++++++++++++----- ipc/sem.c | 14 ++------------ kernel/locking/qspinlock.c | 2 +- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2bcaedc0f032..59a7004fc7dd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -304,6 +304,17 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __u.__val; \ }) +/** + * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency + * + * A control dependency provides a LOAD->STORE order, the additional RMB + * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, + * aka. (load)-ACQUIRE. + * + * Architectures that do not do load speculation can have this be barrier(). + */ +#define smp_acquire__after_ctrl_dep() smp_rmb() + /** * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering * @ptr: pointer to the variable to wait on @@ -314,10 +325,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * * Due to C lacking lambda expressions we load the value of *ptr into a * pre-named variable @VAL to be used in @cond. - * - * The control dependency provides a LOAD->STORE order, the additional RMB - * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, - * aka. ACQUIRE. */ #ifndef smp_cond_load_acquire #define smp_cond_load_acquire(ptr, cond_expr) ({ \ @@ -329,7 +336,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s break; \ cpu_relax(); \ } \ - smp_rmb(); /* ctrl + rmb := acquire */ \ + smp_acquire__after_ctrl_dep(); \ VAL; \ }) #endif diff --git a/ipc/sem.c b/ipc/sem.c index b3757ea0694b..84dff3df11a4 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -259,16 +259,6 @@ static void sem_rcu_free(struct rcu_head *head) ipc_rcu_free(head); } -/* - * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they - * are only control barriers. - * The code must pair with spin_unlock(&sem->lock) or - * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient. - * - * smp_rmb() is sufficient, as writes cannot pass the control barrier. - */ -#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() - /* * Wait until all currently ongoing simple ops have completed. * Caller must own sem_perm.lock. @@ -292,7 +282,7 @@ static void sem_wait_array(struct sem_array *sma) sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); } - ipc_smp_acquire__after_spin_is_unlocked(); + smp_acquire__after_ctrl_dep(); } /* @@ -350,7 +340,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * complex_count++; * spin_unlock(sem_perm.lock); */ - ipc_smp_acquire__after_spin_is_unlocked(); + smp_acquire__after_ctrl_dep(); /* * Now repeat the test of complex_count: diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 1b8dda90ebfa..730655533440 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -379,7 +379,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); done: - smp_rmb(); /* CTRL + RMB -> ACQUIRE */ + smp_acquire__after_ctrl_dep(); } EXPORT_SYMBOL(queued_spin_unlock_wait); #endif From 7cb45c0fe9858f92cc264f6bf9d2f6a0e7d3b249 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Jun 2016 19:23:54 +0200 Subject: [PATCH 22/67] locking/barriers: Move smp_cond_load_acquire() to asm-generic/barrier.h Since all asm/barrier.h should/must include asm-generic/barrier.h the latter is a good place for generic infrastructure like this. This also allows archs to override the new smp_acquire__after_ctrl_dep(). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/barrier.h | 39 +++++++++++++++++++++++++++++++++++ include/linux/compiler.h | 37 --------------------------------- 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 1cceca146905..ab7b0bd7d4dd 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -207,5 +207,44 @@ do { \ #define virt_store_release(p, v) __smp_store_release(p, v) #define virt_load_acquire(p) __smp_load_acquire(p) +/** + * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency + * + * A control dependency provides a LOAD->STORE order, the additional RMB + * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, + * aka. (load)-ACQUIRE. + * + * Architectures that do not do load speculation can have this be barrier(). + */ +#ifndef smp_acquire__after_ctrl_dep +#define smp_acquire__after_ctrl_dep() smp_rmb() +#endif + +/** + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + * + * Due to C lacking lambda expressions we load the value of *ptr into a + * pre-named variable @VAL to be used in @cond. + */ +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + } \ + smp_acquire__after_ctrl_dep(); \ + VAL; \ +}) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 59a7004fc7dd..2e853b679a5d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -304,43 +304,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __u.__val; \ }) -/** - * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency - * - * A control dependency provides a LOAD->STORE order, the additional RMB - * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, - * aka. (load)-ACQUIRE. - * - * Architectures that do not do load speculation can have this be barrier(). - */ -#define smp_acquire__after_ctrl_dep() smp_rmb() - -/** - * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering - * @ptr: pointer to the variable to wait on - * @cond: boolean expression to wait for - * - * Equivalent to using smp_load_acquire() on the condition variable but employs - * the control dependency of the wait to reduce the barrier on many platforms. - * - * Due to C lacking lambda expressions we load the value of *ptr into a - * pre-named variable @VAL to be used in @cond. - */ -#ifndef smp_cond_load_acquire -#define smp_cond_load_acquire(ptr, cond_expr) ({ \ - typeof(ptr) __PTR = (ptr); \ - typeof(*ptr) VAL; \ - for (;;) { \ - VAL = READ_ONCE(*__PTR); \ - if (cond_expr) \ - break; \ - cpu_relax(); \ - } \ - smp_acquire__after_ctrl_dep(); \ - VAL; \ -}) -#endif - #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ From b464d1270a8016edcf1fd20d77cefdecf9b0b73e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2016 14:32:04 +0200 Subject: [PATCH 23/67] locking/barriers, tile: Provide TILE specific smp_acquire__after_ctrl_dep() Since TILE doesn't do read speculation, its control dependencies also guarantee LOAD->LOAD order and we don't need the additional RMB otherwise required to provide ACQUIRE semantics. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/barrier.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index d55222806c2f..4c419ab95ab7 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -87,6 +87,13 @@ mb_incoherent(void) #define __smp_mb__after_atomic() __smp_mb() #endif +/* + * The TILE architecture does not do speculative reads; this ensures + * that a control dependency also orders against loads and already provides + * a LOAD->{LOAD,STORE} order and can forgo the additional RMB. + */ +#define smp_acquire__after_ctrl_dep() barrier() + #include #endif /* !__ASSEMBLY__ */ From 726328d92a42b6d4b76078e2659f43067f82c4e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 May 2016 10:35:03 +0200 Subject: [PATCH 24/67] locking/spinlock, arch: Update and fix spin_unlock_wait() implementations This patch updates/fixes all spin_unlock_wait() implementations. The update is in semantics; where it previously was only a control dependency, we now upgrade to a full load-acquire to match the store-release from the spin_unlock() we waited on. This ensures that when spin_unlock_wait() returns, we're guaranteed to observe the full critical section we waited on. This fixes a number of spin_unlock_wait() users that (not unreasonably) rely on this. I also fixed a number of ticket lock versions to only wait on the current lock holder, instead of for a full unlock, as this is sufficient. Furthermore; again for ticket locks; I added an smp_rmb() in between the initial ticket load and the spin loop testing the current value because I could not convince myself the address dependency is sufficient, esp. if the loads are of different sizes. I'm more than happy to remove this smp_rmb() again if people are certain the address dependency does indeed work as expected. Note: PPC32 will be fixed independently Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: chris@zankel.net Cc: cmetcalf@mellanox.com Cc: davem@davemloft.net Cc: dhowells@redhat.com Cc: james.hogan@imgtec.com Cc: jejb@parisc-linux.org Cc: linux@armlinux.org.uk Cc: mpe@ellerman.id.au Cc: ralf@linux-mips.org Cc: realmz6@gmail.com Cc: rkuo@codeaurora.org Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Cc: vgupta@synopsys.com Cc: ysato@users.sourceforge.jp Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h | 9 +++++++-- arch/arc/include/asm/spinlock.h | 7 +++++-- arch/arm/include/asm/spinlock.h | 19 +++++++++++++++++-- arch/blackfin/include/asm/spinlock.h | 5 +++-- arch/hexagon/include/asm/spinlock.h | 10 ++++++++-- arch/ia64/include/asm/spinlock.h | 4 ++++ arch/m32r/include/asm/spinlock.h | 9 +++++++-- arch/metag/include/asm/spinlock.h | 14 ++++++++++++-- arch/mips/include/asm/spinlock.h | 19 +++++++++++++++++-- arch/mn10300/include/asm/spinlock.h | 8 +++++++- arch/parisc/include/asm/spinlock.h | 9 +++++++-- arch/s390/include/asm/spinlock.h | 3 +++ arch/sh/include/asm/spinlock.h | 10 ++++++++-- arch/sparc/include/asm/spinlock_32.h | 7 +++++-- arch/sparc/include/asm/spinlock_64.h | 10 +++++++--- arch/tile/lib/spinlock_32.c | 6 ++++++ arch/tile/lib/spinlock_64.c | 6 ++++++ arch/xtensa/include/asm/spinlock.h | 10 ++++++++-- include/asm-generic/barrier.h | 2 +- include/linux/spinlock_up.h | 10 +++++++--- 20 files changed, 145 insertions(+), 32 deletions(-) diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index fed9c6f44c19..a40b9fc0c6c3 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -3,6 +3,8 @@ #include #include +#include +#include /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -13,8 +15,11 @@ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while ((x)->lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index cded4a9b5438..233d5ffe6ec7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -15,8 +15,11 @@ #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #ifdef CONFIG_ARC_HAS_LLSC diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0fa418463f49..4bec45442072 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -6,6 +6,8 @@ #endif #include +#include +#include /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K @@ -50,8 +52,21 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->tickets.owner); + + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.tickets.owner == tmp.tickets.next || + tmp.tickets.owner != owner) + break; + + wfe(); + } + smp_acquire__after_ctrl_dep(); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 490c7caa02d9..c58f4a83ed6f 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -12,6 +12,8 @@ #else #include +#include +#include asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); asmlinkage void __raw_spin_lock_asm(volatile int *ptr); @@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (arch_spin_is_locked(lock)) - cpu_relax(); + smp_cond_load_acquire(&lock->lock, !VAL); } static inline int arch_read_can_lock(arch_rwlock_t *rw) diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 12ca4ebc0338..a1c55788c5d6 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -23,6 +23,8 @@ #define _ASM_SPINLOCK_H #include +#include +#include /* * This file is pulled in for SMP builds. @@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) * SMP spinlocks are intended to allow only a single CPU at the lock */ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(lock) \ - do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} + #define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 45698cd15b7b..ca9e76149a4a 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -15,6 +15,8 @@ #include #include +#include +#include #define arch_spin_lock_init(x) ((x)->lock = 0) @@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) return; cpu_relax(); } + + smp_acquire__after_ctrl_dep(); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index fa13694eaae3..323c7fc953cd 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include /* * Your basic SMP spinlocks, allowing only a single CPU anywhere @@ -27,8 +29,11 @@ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL > 0); +} /** * arch_spin_trylock - Try spin lock and return a result diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index 86a7cf3d1386..c0c7a22be1ae 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -1,14 +1,24 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include +#include + #ifdef CONFIG_METAG_ATOMICITY_LOCK1 #include #else #include #endif -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +/* + * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1 + * locked. + */ + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 40196bebe849..f485afe51514 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) } #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - while (arch_spin_is_locked(x)) { cpu_relax(); } + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->h.serving_now); + smp_rmb(); + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.h.serving_now == tmp.h.ticket || + tmp.h.serving_now != owner) + break; + + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} static inline int arch_spin_is_contended(arch_spinlock_t *lock) { diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index 1ae580f38933..9c7b8f7942d8 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -12,6 +12,8 @@ #define _ASM_SPINLOCK_H #include +#include +#include #include #include @@ -23,7 +25,11 @@ */ #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) -#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} static inline void arch_spin_unlock(arch_spinlock_t *lock) { diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 64f2992e439f..e32936cd7f10 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x) } #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *x) +{ + volatile unsigned int *a = __ldcw_align(x); + + smp_cond_load_acquire(a, VAL); +} static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 63ebf37d3143..7e9e09f600fa 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,8 @@ #define __ASM_SPINLOCK_H #include +#include +#include #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) @@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (arch_spin_is_locked(lock)) arch_spin_relax(lock); + smp_acquire__after_ctrl_dep(); } /* diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index bdc0f3b6c56a..416834b60ad0 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -19,14 +19,20 @@ #error "Need movli.l/movco.l for spinlocks" #endif +#include +#include + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere */ #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} /* * Simple spin lock operations. There are two variants, one clears IRQ's diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bcc98fc35281..d9c5876c6121 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,12 +9,15 @@ #ifndef __ASSEMBLY__ #include +#include #include /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 968917694978..87990b7c6b0d 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -8,6 +8,9 @@ #ifndef __ASSEMBLY__ +#include +#include + /* To get debugging spinlocks which detect and catch * deadlock situations, set CONFIG_DEBUG_SPINLOCK * and rebuild your kernel. @@ -23,9 +26,10 @@ #define arch_spin_is_locked(lp) ((lp)->lock != 0) -#define arch_spin_unlock_wait(lp) \ - do { rmb(); \ - } while((lp)->lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 88c2a53362e7..076c6cc43113 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (READ_ONCE(lock->current_ticket) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index c8d1f94ff1fe..a4b5b2cbce93 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index 1d95fa5dcd10..a36221cf6363 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -11,6 +11,9 @@ #ifndef _XTENSA_SPINLOCK_H #define _XTENSA_SPINLOCK_H +#include +#include + /* * spinlock * @@ -29,8 +32,11 @@ */ #define arch_spin_is_locked(x) ((x)->slock != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index ab7b0bd7d4dd..fe297b599b0a 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -194,7 +194,7 @@ do { \ }) #endif -#endif +#endif /* CONFIG_SMP */ /* Barriers for virtual machine guests when talking to an SMP host */ #define virt_mb() __smp_mb() diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 8b3ac0d718eb..0d9848de677d 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -6,6 +6,7 @@ #endif #include /* for cpu_relax() */ +#include /* * include/linux/spinlock_up.h - UP-debug version of spinlocks. @@ -25,6 +26,11 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define arch_spin_is_locked(x) ((x)->slock == 0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL); +} + static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; @@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) +#define arch_spin_unlock_wait(lock) do { barrier(); (void)(lock); } while (0) /* for sched/core.c and kernel_lock.c: */ # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) # define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) @@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_read_can_lock(lock) (((void)(lock), 1)) #define arch_write_can_lock(lock) (((void)(lock), 1)) -#define arch_spin_unlock_wait(lock) \ - do { cpu_relax(); } while (arch_spin_is_locked(lock)) - #endif /* __LINUX_SPINLOCK_UP_H */ From be3e7844980352756de4261b276ee2ba5be7a26b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2016 14:45:21 +0200 Subject: [PATCH 25/67] locking/spinlock: Update spin_unlock_wait() users With the modified semantics of spin_unlock_wait() a number of explicit barriers can be removed. Also update the comment for the do_exit() usecase, as that was somewhat stale/obscure. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- ipc/sem.c | 1 - kernel/exit.c | 8 ++++++-- kernel/task_work.c | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index 84dff3df11a4..ae72b3cddc8d 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -282,7 +282,6 @@ static void sem_wait_array(struct sem_array *sma) sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); } - smp_acquire__after_ctrl_dep(); } /* diff --git a/kernel/exit.c b/kernel/exit.c index 9e6e1356e6bb..0b40791b9e70 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -700,10 +700,14 @@ void do_exit(long code) exit_signals(tsk); /* sets PF_EXITING */ /* - * tsk->flags are checked in the futex code to protect against - * an exiting task cleaning up the robust pi futexes. + * Ensure that all new tsk->pi_lock acquisitions must observe + * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). */ smp_mb(); + /* + * Ensure that we must observe the pi_state in exit_mm() -> + * mm_release() -> exit_pi_state_list(). + */ raw_spin_unlock_wait(&tsk->pi_lock); if (unlikely(in_atomic())) { diff --git a/kernel/task_work.c b/kernel/task_work.c index 53fa971d000d..6ab4842b00e8 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -108,7 +108,6 @@ void task_work_run(void) * fail, but it can play with *work and other entries. */ raw_spin_unlock_wait(&task->pi_lock); - smp_mb(); do { next = work->next; From b316ff783d17bd6217804e2e4385ce9347d7dad9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2016 15:00:38 +0200 Subject: [PATCH 26/67] locking/spinlock, netfilter: Fix nf_conntrack_lock() barriers Even with spin_unlock_wait() fixed, nf_conntrack_lock{,_all}() is borken as it misses a bunch of memory barriers to order the whole global vs local locks scheme. Even x86 (and other TSO archs) are affected. Signed-off-by: Peter Zijlstra (Intel) [ Updated the comments. ] Cc: Andrew Morton Cc: David S. Miller Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- net/netfilter/nf_conntrack_core.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312eeb2a4..b8c5501d3872 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -83,6 +83,13 @@ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) spin_lock(lock); while (unlikely(nf_conntrack_locks_all)) { spin_unlock(lock); + + /* + * Order the 'nf_conntrack_locks_all' load vs. the + * spin_unlock_wait() loads below, to ensure + * that 'nf_conntrack_locks_all_lock' is indeed held: + */ + smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ spin_unlock_wait(&nf_conntrack_locks_all_lock); spin_lock(lock); } @@ -128,6 +135,14 @@ static void nf_conntrack_all_lock(void) spin_lock(&nf_conntrack_locks_all_lock); nf_conntrack_locks_all = true; + /* + * Order the above store of 'nf_conntrack_locks_all' against + * the spin_unlock_wait() loads below, such that if + * nf_conntrack_lock() observes 'nf_conntrack_locks_all' + * we must observe nf_conntrack_locks[] held: + */ + smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ + for (i = 0; i < CONNTRACK_LOCKS; i++) { spin_unlock_wait(&nf_conntrack_locks[i]); } @@ -135,7 +150,13 @@ static void nf_conntrack_all_lock(void) static void nf_conntrack_all_unlock(void) { - nf_conntrack_locks_all = false; + /* + * All prior stores must be complete before we clear + * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() + * might observe the false value but not the entire + * critical section: + */ + smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); } From 1f51dee7ca7424be6f84067395166f878dbdd8be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:10 +0200 Subject: [PATCH 27/67] locking/atomic, arch/alpha: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 65 ++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 572b228c44c7..8243f17999e3 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -65,6 +65,25 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldl_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ +} + #define ATOMIC64_OP(op, asm_op) \ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ { \ @@ -101,11 +120,32 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, asm_op) \ +static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +{ \ + long temp, result; \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldq_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ + smp_mb(); \ + return result; \ +} + #define ATOMIC_OPS(op) \ ATOMIC_OP(op, op##l) \ ATOMIC_OP_RETURN(op, op##l) \ + ATOMIC_FETCH_OP(op, op##l) \ ATOMIC64_OP(op, op##q) \ - ATOMIC64_OP_RETURN(op, op##q) + ATOMIC64_OP_RETURN(op, op##q) \ + ATOMIC64_FETCH_OP(op, op##q) ATOMIC_OPS(add) ATOMIC_OPS(sub) @@ -113,18 +153,25 @@ ATOMIC_OPS(sub) #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, bis) -ATOMIC_OP(xor, xor) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, bis) -ATOMIC64_OP(xor, xor) +#define atomic_fetch_or atomic_fetch_or #undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm) \ + ATOMIC_OP(op, asm) \ + ATOMIC_FETCH_OP(op, asm) \ + ATOMIC64_OP(op, asm) \ + ATOMIC64_FETCH_OP(op, asm) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, bis) +ATOMIC_OPS(xor, xor) + +#undef ATOMIC_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From fbffe892e5253dd02c016c59a9d792eafe9d53e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:09 +0200 Subject: [PATCH 28/67] locking/atomic, arch/arc: Implement atomic_fetch_{add,sub,and,andnot,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vineet Gupta Cc: Andrew Morton Cc: Linus Torvalds Cc: Noam Camus Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Ingo Molnar --- arch/arc/include/asm/atomic.h | 103 +++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index dd683995bc9d..c066a21caaaf 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -67,6 +67,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, orig; \ + SCOND_FAIL_RETRY_VAR_DEF \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[orig], [%[ctr]] \n" \ + " " #asm_op " %[val], %[orig], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val), \ + [orig] "=&r" (orig) \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return orig; \ +} + #else /* !CONFIG_ARC_HAS_LLSC */ #ifndef CONFIG_SMP @@ -129,21 +160,46 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long orig; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + orig = v->counter; \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ + \ + return orig; \ +} + #endif /* !CONFIG_ARC_HAS_LLSC */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#define atomic_fetch_or atomic_fetch_or + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #undef SCOND_FAIL_RETRY_VAR_DEF #undef SCOND_FAIL_RETRY_ASM @@ -208,22 +264,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int temp = i; \ + \ + /* Explicit full memory barrier needed before/after */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + " mov r2, %0\n" \ + " mov r3, %1\n" \ + " .word %2\n" \ + " mov %0, r2" \ + : "+r"(temp) \ + : "r"(&v->counter), "i"(asm_op) \ + : "r2", "r3", "memory"); \ + \ + smp_mb(); \ + \ + return temp; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) #define atomic_sub(i, v) atomic_add(-(i), (v)) #define atomic_sub_return(i, v) atomic_add_return(-(i), (v)) -ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) #define atomic_andnot(mask, v) atomic_and(~(mask), (v)) -ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) -ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) +ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) +ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #endif /* CONFIG_ARC_PLAT_EZNPS */ #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From 6da068c1beba684b2a0dbf43a07b0529edd9e959 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:10:52 +0200 Subject: [PATCH 29/67] locking/atomic, arch/arm: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/atomic.h | 108 ++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 9e10c4567eb4..0feb110ec542 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result, val; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic_fetch_" #op "\n" \ +"1: ldrex %0, [%4]\n" \ +" " #asm_op " %1, %0, %5\n" \ +" strex %2, %1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ + \ + return result; \ +} + #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new) { @@ -159,6 +187,22 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int val; \ + \ + raw_local_irq_save(flags); \ + val = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return val; \ +} + +#define atomic_fetch_or atomic_fetch_or + static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -187,19 +231,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, orr) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, orr) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -317,24 +368,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline long long \ +atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +{ \ + long long result, val; \ + unsigned long tmp; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \ +"1: ldrexd %0, %H0, [%4]\n" \ +" " #op1 " %Q1, %Q0, %Q5\n" \ +" " #op2 " %R1, %R0, %R5\n" \ +" strexd %2, %1, %H1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ - ATOMIC64_OP_RETURN(op, op1, op2) + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) ATOMIC64_OPS(add, adds, adc) ATOMIC64_OPS(sub, subs, sbc) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) #define atomic64_andnot atomic64_andnot -ATOMIC64_OP(and, and, and) -ATOMIC64_OP(andnot, bic, bic) -ATOMIC64_OP(or, orr, orr) -ATOMIC64_OP(xor, eor, eor) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, orr, orr) +ATOMIC64_OPS(xor, eor, eor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP From e490f9b1d3b40ba32ad07432b63b813ce3052d41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:09 +0200 Subject: [PATCH 30/67] locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). [wildea01: compile fixes for ll/sc] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Mark Rutland Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic.h | 62 ++++++++++++++ arch/arm64/include/asm/atomic_ll_sc.h | 114 ++++++++++++++++++++------ 2 files changed, 150 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index f3a3586a421c..3128c3d7c1ff 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -76,6 +76,36 @@ #define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#define atomic_fetch_add_release atomic_fetch_add_release +#define atomic_fetch_add atomic_fetch_add + +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define atomic_fetch_sub_release atomic_fetch_sub_release +#define atomic_fetch_sub atomic_fetch_sub + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#define atomic_fetch_and_release atomic_fetch_and_release +#define atomic_fetch_and atomic_fetch_and + +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#define atomic_fetch_andnot atomic_fetch_andnot + +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#define atomic_fetch_or_release atomic_fetch_or_release +#define atomic_fetch_or atomic_fetch_or + +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define atomic_fetch_xor_release atomic_fetch_xor_release +#define atomic_fetch_xor atomic_fetch_xor + #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) #define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) @@ -98,6 +128,8 @@ #define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) #define atomic_andnot atomic_andnot +#define atomic_fetch_or atomic_fetch_or + /* * 64-bit atomic operations. */ @@ -125,6 +157,36 @@ #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define atomic64_fetch_add_release atomic64_fetch_add_release +#define atomic64_fetch_add atomic64_fetch_add + +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#define atomic64_fetch_sub atomic64_fetch_sub + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define atomic64_fetch_and_release atomic64_fetch_and_release +#define atomic64_fetch_and atomic64_fetch_and + +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define atomic64_fetch_andnot atomic64_fetch_andnot + +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define atomic64_fetch_or_release atomic64_fetch_or_release +#define atomic64_fetch_or atomic64_fetch_or + +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#define atomic64_fetch_xor atomic64_fetch_xor + #define atomic64_xchg_relaxed atomic_xchg_relaxed #define atomic64_xchg_acquire atomic_xchg_acquire #define atomic64_xchg_release atomic_xchg_release diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f61c84f6ba02..f819fdcff1ac 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op##_return##name); +#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %w0, %3\n" \ +" " #asm_op " %w1, %w0, %w4\n" \ +" st" #rel "xr %w2, %w1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_fetch_##op##name); + #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ - ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC_OPS_RLX(...) \ - ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ - ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS_RLX(add, add) -ATOMIC_OPS_RLX(sub, sub) +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) - -#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, orr) +ATOMIC_OPS(xor, eor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op##_return##name); +#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +{ \ + long result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %0, %3\n" \ +" " #asm_op " %1, %0, %4\n" \ +" st" #rel "xr %w2, %1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_fetch_##op##name); + #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ - ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC64_OPS_RLX(...) \ - ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ - ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS_RLX(add, add) -ATOMIC64_OPS_RLX(sub, sub) +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) - -#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) + +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(andnot, bic) +ATOMIC64_OPS(or, orr) +ATOMIC64_OPS(xor, eor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP From 6822a84dd4e35a1beb70028e46b5f60c14fc422d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Apr 2016 18:01:32 +0100 Subject: [PATCH 31/67] locking/atomic, arch/arm64: Generate LSE non-return cases using common macros atomic[64]_{add,and,andnot,or,xor} all follow the same patterns, so generate them using macros, like we do for the LL/SC case already. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 122 ++++++++-------------------- 1 file changed, 32 insertions(+), 90 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d340fec5..37a0f03560f7 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,25 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -167,54 +138,25 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ +" " #asm_op " %[i], %[v]\n") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC64_OP(andnot, stclr) +ATOMIC64_OP(or, stset) +ATOMIC64_OP(xor, steor) +ATOMIC64_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_add(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), - " stadd %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC64_OP #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ From 2efe95fe695270ae1a225805f016303505972d86 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Apr 2016 18:01:33 +0100 Subject: [PATCH 32/67] locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). This patch implements the LSE variants. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 172 ++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 37a0f03560f7..b5890be8f257 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -46,6 +46,38 @@ ATOMIC_OP(add, stadd) #undef ATOMIC_OP +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP( , al, op, asm_op, "memory") + +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS + #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ { \ @@ -90,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_AND(_relaxed, ) +ATOMIC_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC_FETCH_OP_AND(_release, l, "memory") +ATOMIC_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC_FETCH_OP_AND + static inline void atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; @@ -135,6 +194,33 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory") ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN + +#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ +static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_SUB(_relaxed, ) +ATOMIC_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC_FETCH_OP_SUB(_release, l, "memory") +ATOMIC_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC_FETCH_OP_SUB #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) @@ -158,6 +244,38 @@ ATOMIC64_OP(add, stadd) #undef ATOMIC64_OP +#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC64(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +#define ATOMIC64_FETCH_OPS(op, asm_op) \ + ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP( , al, op, asm_op, "memory") + +ATOMIC64_FETCH_OPS(andnot, ldclr) +ATOMIC64_FETCH_OPS(or, ldset) +ATOMIC64_FETCH_OPS(xor, ldeor) +ATOMIC64_FETCH_OPS(add, ldadd) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS + #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ { \ @@ -202,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %[i], %[i]\n" \ + " ldclr" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_AND(_relaxed, ) +ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC64_FETCH_OP_AND(_release, l, "memory") +ATOMIC64_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC64_FETCH_OP_AND + static inline void atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; @@ -248,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN +#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ +static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_SUB(_relaxed, ) +ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC64_FETCH_OP_SUB(_release, l, "memory") +ATOMIC64_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC64_FETCH_OP_SUB + static inline long atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; From 1a6eafacd4811cdc1b138faee858527658eee4e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: [PATCH 33/67] locking/atomic, arch/avr32: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Hans-Christian Noren Egtvedt Cc: Andrew Morton Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/avr32/include/asm/atomic.h | 56 ++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index d74fd8ce980a..b8681fd495ef 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -41,21 +41,51 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \ +static inline int __atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, val; \ + \ + asm volatile( \ + "/* atomic_fetch_" #op " */\n" \ + "1: ssrf 5\n" \ + " ld.w %0, %3\n" \ + " mov %1, %0\n" \ + " " #asm_op " %1, %4\n" \ + " stcond %2, %1\n" \ + " brne 1b" \ + : "=&r" (result), "=&r" (val), "=o" (v->counter) \ + : "m" (v->counter), #asm_con (i) \ + : "cc"); \ + \ + return result; \ +} + ATOMIC_OP_RETURN(sub, sub, rKs21) ATOMIC_OP_RETURN(add, add, r) +ATOMIC_FETCH_OP (sub, sub, rKs21) +ATOMIC_FETCH_OP (add, add, r) -#define ATOMIC_OP(op, asm_op) \ +#define atomic_fetch_or atomic_fetch_or + +#define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic_##op##_return(i, v); \ +} \ +ATOMIC_FETCH_OP(op, asm_op, r) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic_fetch_##op(i, v); \ } -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OP +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN /* @@ -87,6 +117,14 @@ static inline int atomic_add_return(int i, atomic_t *v) return __atomic_add_return(i, v); } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(-i, v); + + return __atomic_fetch_add(i, v); +} + /* * atomic_sub_return - subtract the atomic variable * @i: integer value to subtract @@ -102,6 +140,14 @@ static inline int atomic_sub_return(int i, atomic_t *v) return __atomic_add_return(-i, v); } +static inline int atomic_fetch_sub(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(i, v); + + return __atomic_fetch_add(-i, v); +} + /* * __atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t From e87fc0ec070554e34812be68267a9450271868d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: [PATCH 34/67] locking/atomic, arch/blackfin: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Miao Cc: Thomas Gleixner Cc: adi-buildroot-devel@lists.sourceforge.net Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/blackfin/include/asm/atomic.h | 8 ++++++ arch/blackfin/kernel/bfin_ksyms.c | 1 + arch/blackfin/mach-bf561/atomic.S | 43 +++++++++++++++++++++--------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 1c1c42330c99..63c7deceeeb6 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -17,6 +17,7 @@ asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr); asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value); +asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value); @@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) +#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i) +#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i)) + #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) +#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i) +#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i) +#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i) + #endif #include diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index a401c27b69b4..68096e8f787f 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16); #ifdef CONFIG_SMP EXPORT_SYMBOL(__raw_atomic_add_asm); +EXPORT_SYMBOL(__raw_atomic_xadd_asm); EXPORT_SYMBOL(__raw_atomic_and_asm); EXPORT_SYMBOL(__raw_atomic_or_asm); EXPORT_SYMBOL(__raw_atomic_xor_asm); diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S index 26fccb5568b9..1e2989c5d6b2 100644 --- a/arch/blackfin/mach-bf561/atomic.S +++ b/arch/blackfin/mach-bf561/atomic.S @@ -605,6 +605,28 @@ ENTRY(___raw_atomic_add_asm) rts; ENDPROC(___raw_atomic_add_asm) +/* + * r0 = ptr + * r1 = value + * + * ADD a signed value to a 32bit word and return the old value atomically. + * Clobbers: r3:0, p1:0 + */ +ENTRY(___raw_atomic_xadd_asm) + p1 = r0; + r3 = r1; + [--sp] = rets; + call _get_core_lock; + r3 = [p1]; + r2 = r3 + r2; + [p1] = r2; + r1 = p1; + call _put_core_lock; + r0 = r3; + rets = [sp++]; + rts; +ENDPROC(___raw_atomic_add_asm) + /* * r0 = ptr * r1 = mask @@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 & r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 & r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 | r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 | r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 ^ r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 ^ r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; From d9c730281617e55ca470e66f8e9d7d3f5f420fec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: [PATCH 35/67] locking/atomic, arch/frv: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/frv/include/asm/atomic.h | 32 +++++++++++------------------- arch/frv/include/asm/atomic_defs.h | 2 ++ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 64f02d451aa8..e3e06da0cd59 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v) return atomic_add_return(i, v) < 0; } -static inline void atomic_add(int i, atomic_t *v) -{ - atomic_add_return(i, v); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - atomic_sub_return(i, v); -} - static inline void atomic_inc(atomic_t *v) { atomic_inc_return(v); @@ -84,6 +74,8 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) +#define atomic_fetch_or atomic_fetch_or + /* * 64-bit atomic ops */ @@ -136,16 +128,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v) return atomic64_add_return(i, v) < 0; } -static inline void atomic64_add(long long i, atomic64_t *v) -{ - atomic64_add_return(i, v); -} - -static inline void atomic64_sub(long long i, atomic64_t *v) -{ - atomic64_sub_return(i, v); -} - static inline void atomic64_inc(atomic64_t *v) { atomic64_inc_return(v); @@ -182,11 +164,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) } #define ATOMIC_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic32_fetch_##op(i, &v->counter); \ +} \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic32_fetch_##op(i, &v->counter); \ } \ \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + return __atomic64_fetch_##op(i, &v->counter); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ (void)__atomic64_fetch_##op(i, &v->counter); \ @@ -195,6 +185,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) +ATOMIC_OP(add) +ATOMIC_OP(sub) #undef ATOMIC_OP diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h index 36e126d2f801..d4912c88b829 100644 --- a/arch/frv/include/asm/atomic_defs.h +++ b/arch/frv/include/asm/atomic_defs.h @@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op); ATOMIC_FETCH_OP(or) ATOMIC_FETCH_OP(and) ATOMIC_FETCH_OP(xor) +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) ATOMIC_OP_RETURN(add) ATOMIC_OP_RETURN(sub) From 0c074cbc33091dd69fe70ec27474d228c3184860 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:08 +0200 Subject: [PATCH 36/67] locking/atomic, arch/h8300: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: uclinux-h8-devel@lists.sourceforge.jp Signed-off-by: Ingo Molnar --- arch/h8300/include/asm/atomic.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 4435a445ae7e..0961b618bdde 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + h8300flags flags; \ + int ret; \ + \ + flags = arch_local_irq_save(); \ + ret = v->counter; \ + v->counter c_op i; \ + arch_local_irq_restore(flags); \ + return ret; \ +} + #define ATOMIC_OP(op, c_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ @@ -41,17 +54,23 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#define atomic_fetch_or atomic_fetch_or +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) + +#undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define atomic_add(i, v) (void)atomic_add_return(i, v) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) - -#define atomic_sub(i, v) (void)atomic_sub_return(i, v) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) #define atomic_inc_return(v) atomic_add_return(1, v) From 4be7dd393515615430a4d07ca1ffceaf2a331620 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: [PATCH 37/67] locking/atomic, arch/hexagon: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Kuo Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-hexagon@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/hexagon/include/asm/atomic.h | 33 ++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 55696c4100d4..07dbb3332b4a 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ ); \ } \ -#define ATOMIC_OP_RETURN(op) \ +#define ATOMIC_OP_RETURN(op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ int output; \ @@ -127,16 +127,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return output; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int output, val; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%2);\n" \ + " %1 = "#op "(%0,%3);\n" \ + " memw_locked(%2,P3)=%1;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ + return output; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From cc102507fac75f9f4f37938f49d10c25e596a608 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: [PATCH 38/67] locking/atomic, arch/ia64: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Fenghua Yu Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/atomic.h | 130 +++++++++++++++++++++++++++++---- 1 file changed, 114 insertions(+), 16 deletions(-) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 8dfb5f6f6c35..f565ad376142 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \ return new; \ } -ATOMIC_OP(add, +) -ATOMIC_OP(sub, -) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_fetch_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) #define atomic_add_return(i,v) \ ({ \ @@ -69,14 +88,44 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -ATOMIC_OP(and, &) -ATOMIC_OP(or, |) -ATOMIC_OP(xor, ^) +#define atomic_fetch_add(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_add(__ia64_aar_i, v); \ +}) -#define atomic_and(i,v) (void)ia64_atomic_and(i,v) -#define atomic_or(i,v) (void)ia64_atomic_or(i,v) -#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v) +#define atomic_fetch_sub(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ +}) +ATOMIC_FETCH_OP(and, &) +ATOMIC_FETCH_OP(or, |) +ATOMIC_FETCH_OP(xor, ^) + +#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) +#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) +#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) + +#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) +#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) +#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ @@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ return new; \ } -ATOMIC64_OP(add, +) -ATOMIC64_OP(sub, -) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ @@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -) : ia64_atomic64_sub(__ia64_asr_i, v); \ }) -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define atomic64_fetch_add(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ +}) -#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v) -#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v) -#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v) +#define atomic64_fetch_sub(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ +}) +ATOMIC64_FETCH_OP(and, &) +ATOMIC64_FETCH_OP(or, |) +ATOMIC64_FETCH_OP(xor, ^) + +#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) +#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) +#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) + +#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) +#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) +#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) From f649370523033c7c2adf16a9d062438c8a7758b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:07 +0200 Subject: [PATCH 39/67] locking/atomic, arch/m32r: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/m32r/include/asm/atomic.h | 38 ++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index ea35160d632b..8ba8a0ab5d5d 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -89,16 +89,46 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result, val; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_fetch_" #op " \n\t" \ + DCACHE_CLEAR("%0", "r4", "%2") \ + M32R_LOCK" %1, @%2; \n\t" \ + "mv %0, %1 \n\t" \ + #op " %1, %3; \n\t" \ + M32R_UNLOCK" %1, @%2; \n\t" \ + : "=&r" (result), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From e39d88ea3ce4a471cd0202f4f2c8f5ee0f8d7f53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: [PATCH 40/67] locking/atomic, arch/m68k: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Geert Uytterhoeven Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Ingo Molnar --- arch/m68k/include/asm/atomic.h | 53 +++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 4858178260f9..5cf9b3b1b6ac 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -38,6 +38,13 @@ static inline void atomic_##op(int i, atomic_t *v) \ #ifdef CONFIG_RMW_INSNS +/* + * Am I reading these CAS loops right in that %2 is the old value and the first + * iteration uses an uninitialized value? + * + * Would it not make sense to add: tmp = atomic_read(v); to avoid this? + */ + #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ @@ -53,6 +60,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int t, tmp; \ + \ + __asm__ __volatile__( \ + "1: movel %2,%1\n" \ + " " #asm_op "l %3,%1\n" \ + " casl %2,%1,%0\n" \ + " jne 1b" \ + : "+m" (*v), "=&d" (t), "=&d" (tmp) \ + : "g" (i), "2" (atomic_read(v))); \ + return tmp; \ +} + #else #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -68,20 +90,43 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long flags; \ + int t; \ + \ + local_irq_save(flags); \ + t = v->counter; \ + v->counter c_op i; \ + local_irq_restore(flags); \ + \ + return t; \ +} + #endif /* CONFIG_RMW_INSNS */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From e898eb27ffd8b0ad6f4fd0b631559bc877c85444 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: [PATCH 41/67] locking/atomic, arch/metag: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Hogan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-metag@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/metag/include/asm/atomic.h | 2 ++ arch/metag/include/asm/atomic_lnkget.h | 36 +++++++++++++++++++++++--- arch/metag/include/asm/atomic_lock1.h | 33 ++++++++++++++++++++--- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h index 470e365f04ea..6ca210de8a7d 100644 --- a/arch/metag/include/asm/atomic.h +++ b/arch/metag/include/asm/atomic.h @@ -17,6 +17,8 @@ #include #endif +#define atomic_fetch_or atomic_fetch_or + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 88fa25fae8bd..def2c642f053 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + smp_mb(); \ + \ + asm volatile ( \ + "1: LNKGETD %1, [%2]\n" \ + " " #op " %0, %1, %3\n" \ + " LNKSETD [%2], %0\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp), "=&d" (result) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 0295d9b8d5bf..6c1380a8a0d4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long result; \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + result = v->counter; \ + fence(); \ + v->counter c_op i; \ + __global_unlock1(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From 4edac529eb629ccd598e2236c61762537f16e883 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:06 +0200 Subject: [PATCH 42/67] locking/atomic, arch/mips: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Ingo Molnar --- arch/mips/include/asm/atomic.h | 138 ++++++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 835b402e4574..431079f8e483 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " " #asm_op " %0, %2 \n" \ " sc %0, %1 \n" \ " .set mips0 \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ } while (unlikely(!temp)); \ } else { \ @@ -130,18 +130,78 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + int result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -414,17 +474,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ +static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + smp_mb__before_llsc(); \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "=" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \ + : "memory"); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ + \ + smp_llsc_mb(); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_OP_RETURN(op, c_op, asm_op) + ATOMIC64_OP_RETURN(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) -ATOMIC64_OP(and, &=, and) -ATOMIC64_OP(or, |=, or) -ATOMIC64_OP(xor, ^=, xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op, asm_op) \ + ATOMIC64_OP(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) + +ATOMIC64_OPS(and, &=, and) +ATOMIC64_OPS(or, |=, or) +ATOMIC64_OPS(xor, ^=, xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP From f8d638e28d7cc858066d2de484d9719dc181593a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: [PATCH 43/67] locking/atomic, arch/mn10300: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: David Howells Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-am33-list@redhat.com Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/mn10300/include/asm/atomic.h | 35 +++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index ce318d5ab23b..3580f789f3a6 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -84,16 +84,43 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return retval; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " mov %1,%0 \n" \ + " " #op " %5,%0 \n" \ + " mov %0,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ + return retval; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From e5857a6ed6004cac5273b8cdc189ab4b6363cfaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: [PATCH 44/67] locking/atomic, arch/parisc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/parisc/include/asm/atomic.h | 65 ++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 1d109990a022..29df1f871910 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -121,16 +121,41 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -185,15 +210,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ return ret; \ } -#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + s64 ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) -ATOMIC64_OP(and, &=) -ATOMIC64_OP(or, |=) -ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &=) +ATOMIC64_OPS(or, |=) +ATOMIC64_OPS(xor, ^=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP From a28cc7bbe8e30ee573e1a27e704558f0862d8c6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: [PATCH 45/67] locking/atomic, arch/powerpc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}{,_relaxed,_acquire,_release}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Tested-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/atomic.h | 83 +++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index ae0751ef8788..f08d567e0ca4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ +{ \ + int res, t; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ + PPC405_ERR77(0, %4) \ +" stwcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) + ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, xor) - #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP_RELAXED #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP @@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ return t; \ } +#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ +static inline long \ +atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +{ \ + long res, t; \ + \ + __asm__ __volatile__( \ +"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ +" stdcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN_RELAXED(op, asm_op) + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) -ATOMIC64_OP(and, and) -ATOMIC64_OP(or, or) -ATOMIC64_OP(xor, xor) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(or, or) +ATOMIC64_OPS(xor, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOPIC64_OPS +#undef ATOMIC64_FETCH_OP_RELAXED #undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP From 56fefbbc3f13ad8cc9f502dbc6b5c9ddc8c4395e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:05 +0200 Subject: [PATCH 46/67] locking/atomic, arch/s390: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Martin Schwidefsky Cc: Andrew Morton Cc: Heiko Carstens Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-s390@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/atomic.h | 42 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 911064aa59b2..2324e759b544 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v) return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER); +} + static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -114,22 +119,29 @@ static inline void atomic_add(int i, atomic_t *v) #define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) #define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) #define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) +#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v) #define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) #define atomic_dec(_v) atomic_sub(1, _v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -#define ATOMIC_OP(op, OP) \ +#define ATOMIC_OPS(op, OP) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -ATOMIC_OP(and, AND) -ATOMIC_OP(or, OR) -ATOMIC_OP(xor, XOR) +#define atomic_fetch_or atomic_fetch_or -#undef ATOMIC_OP +ATOMIC_OPS(and, AND) +ATOMIC_OPS(or, OR) +ATOMIC_OPS(xor, XOR) + +#undef ATOMIC_OPS #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -236,6 +248,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; } +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER); +} + static inline void atomic64_add(long long i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -264,17 +281,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, return old; } -#define ATOMIC64_OP(op, OP) \ +#define ATOMIC64_OPS(op, OP) \ static inline void atomic64_##op(long i, atomic64_t *v) \ { \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ +} \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \ } -ATOMIC64_OP(and, AND) -ATOMIC64_OP(or, OR) -ATOMIC64_OP(xor, XOR) +ATOMIC64_OPS(and, AND) +ATOMIC64_OPS(or, OR) +ATOMIC64_OPS(xor, XOR) -#undef ATOMIC64_OP +#undef ATOMIC64_OPS #undef __ATOMIC64_LOOP static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) @@ -315,6 +336,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_inc_return(_v) atomic64_add_return(1, _v) #define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) #define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v) #define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) #define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) #define atomic64_dec(_v) atomic64_sub(1, _v) From 7d9794e7523798e1b9422ad9f4e4d808ae5d5932 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:04 +0200 Subject: [PATCH 47/67] locking/atomic, arch/sh: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-sh@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/sh/include/asm/atomic-grb.h | 34 +++++++++++++++++++++++++++---- arch/sh/include/asm/atomic-irq.h | 31 ++++++++++++++++++++++++---- arch/sh/include/asm/atomic-llsc.h | 32 +++++++++++++++++++++++++---- arch/sh/include/asm/atomic.h | 2 ++ 4 files changed, 87 insertions(+), 12 deletions(-) diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index b94df40e5f2d..d755e96c3064 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return tmp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int res, tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%2, %0 \n\t" /* load old value */ \ + " mov %0, %1 \n\t" /* save old value */ \ + " " #op " %3, %0 \n\t" /* $op */ \ + " mov.l %0, @%2 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), "=&r" (res), "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 23fcdad5773e..8e2da5fa0178 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long temp, flags; \ + \ + raw_local_irq_save(flags); \ + temp = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return temp; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index 33d34b16d4d6..caea2c45f6c2 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long res, temp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \ +" mov %0, %1 \n" \ +" " #op " %2, %0 \n" \ +" movco.l %0, @%3 \n" \ +" bf 1b \n" \ +" synco \n" \ + : "=&z" (temp), "=&z" (res) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index c399e1c55685..d93ed7ce1b2f 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -25,6 +25,8 @@ #include #endif +#define atomic_fetch_or atomic_fetch_or + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) From 3a1adb23a52c920304239efff377d3bc967febc2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:04 +0200 Subject: [PATCH 48/67] locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: David S. Miller Cc: Andrew Morton Cc: James Y Knight Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: sparclinux@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/sparc/include/asm/atomic.h | 1 + arch/sparc/include/asm/atomic_32.h | 15 ++++++-- arch/sparc/include/asm/atomic_64.h | 16 ++++++-- arch/sparc/lib/atomic32.c | 37 ++++++++++-------- arch/sparc/lib/atomic_64.S | 61 ++++++++++++++++++++++++------ arch/sparc/lib/ksyms.c | 17 +++++++-- 6 files changed, 109 insertions(+), 38 deletions(-) diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h index 8ff83d8cc33f..1f741bcc73b7 100644 --- a/arch/sparc/include/asm/atomic.h +++ b/arch/sparc/include/asm/atomic.h @@ -5,4 +5,5 @@ #else #include #endif +#define atomic_fetch_or atomic_fetch_or #endif diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7dcbebbcaec6..5cfb20a599d9 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -20,9 +20,10 @@ #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); -void atomic_and(int, atomic_t *); -void atomic_or(int, atomic_t *); -void atomic_xor(int, atomic_t *); +int atomic_fetch_add(int, atomic_t *); +int atomic_fetch_and(int, atomic_t *); +int atomic_fetch_or(int, atomic_t *); +int atomic_fetch_xor(int, atomic_t *); int atomic_cmpxchg(atomic_t *, int, int); int atomic_xchg(atomic_t *, int); int __atomic_add_unless(atomic_t *, int, int); @@ -35,7 +36,15 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) +#define atomic_fetch_or atomic_fetch_or + +#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) +#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) +#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) + #define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) +#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v))) + #define atomic_inc_return(v) (atomic_add_return( 1, (v))) #define atomic_dec_return(v) (atomic_add_return( -1, (v))) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2fbf9e16faf..24827a3f733a 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *); int atomic_##op##_return(int, atomic_t *); \ long atomic64_##op##_return(long, atomic64_t *); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +int atomic_fetch_##op(int, atomic_t *); \ +long atomic64_fetch_##op(long, atomic64_t *); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index b9d63c0a7aab..2c373329d5cb 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -27,6 +27,21 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ +#define ATOMIC_FETCH_OP(op, c_op) \ +int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int ret; \ + unsigned long flags; \ + spin_lock_irqsave(ATOMIC_HASH(v), flags); \ + \ + ret = v->counter; \ + v->counter c_op i; \ + \ + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(atomic_fetch_##op); + #define ATOMIC_OP_RETURN(op, c_op) \ int atomic_##op##_return(int i, atomic_t *v) \ { \ @@ -41,25 +56,15 @@ int atomic_##op##_return(int i, atomic_t *v) \ } \ EXPORT_SYMBOL(atomic_##op##_return); -#define ATOMIC_OP(op, c_op) \ -void atomic_##op(int i, atomic_t *v) \ -{ \ - unsigned long flags; \ - spin_lock_irqsave(ATOMIC_HASH(v), flags); \ - \ - v->counter c_op i; \ - \ - spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ -} \ -EXPORT_SYMBOL(atomic_##op); - ATOMIC_OP_RETURN(add, +=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +ATOMIC_FETCH_OP(add, +=) +ATOMIC_FETCH_OP(and, &=) +ATOMIC_FETCH_OP(or, |=) +ATOMIC_FETCH_OP(xor, ^=) + +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#undef ATOMIC_OP int atomic_xchg(atomic_t *v, int new) { diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index d6b0363f345b..a5c5a0279ccc 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -9,10 +9,11 @@ .text - /* Two versions of the atomic routines, one that + /* Three versions of the atomic routines, one that * does not return a value and does not perform - * memory barriers, and a second which returns - * a value and does the barriers. + * memory barriers, and a two which return + * a value, the new and old value resp. and does the + * barriers. */ #define ATOMIC_OP(op) \ @@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic64_##op##_return); -#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) +#define ATOMIC64_FETCH_OP(op) \ +ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + mov %g1, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_fetch_##op); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) ATOMIC64_OPS(add) ATOMIC64_OPS(sub) -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op) + +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 8eb454cfe05c..de5e97817bdb 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op); EXPORT_SYMBOL(atomic_##op##_return); \ EXPORT_SYMBOL(atomic64_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +EXPORT_SYMBOL(atomic_fetch_##op); \ +EXPORT_SYMBOL(atomic64_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From 1af5de9af138941fb8638cf126293b16f3387de4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: [PATCH 49/67] locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic.h | 4 + arch/tile/include/asm/atomic_32.h | 60 +++++++++----- arch/tile/include/asm/atomic_64.h | 125 +++++++++++++++++++----------- arch/tile/include/asm/bitops_32.h | 18 ++--- arch/tile/lib/atomic_32.c | 42 +++++----- arch/tile/lib/atomic_asm_32.S | 14 ++-- 6 files changed, 164 insertions(+), 99 deletions(-) diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9fc0107a9c5e..9807030557c4 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -46,6 +46,10 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) + +#define atomic_fetch_or atomic_fetch_or + /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index d320ce253d86..da8eb4ed3752 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v) _atomic_xchg_add(&v->counter, i); } -#define ATOMIC_OP(op) \ -unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \ +#define ATOMIC_OPS(op) \ +unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - _atomic_##op((unsigned long *)&v->counter, i); \ + _atomic_fetch_##op((unsigned long *)&v->counter, i); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + smp_mb(); \ + return _atomic_fetch_##op((unsigned long *)&v->counter, i); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) -#undef ATOMIC_OP +#undef ATOMIC_OPS + +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + smp_mb(); + return _atomic_xchg_add(&v->counter, i); +} /** * atomic_add_return - add integer and return @@ -126,17 +137,30 @@ static inline void atomic64_add(long long i, atomic64_t *v) _atomic64_xchg_add(&v->counter, i); } -#define ATOMIC64_OP(op) \ -long long _atomic64_##op(long long *v, long long n); \ +#define ATOMIC64_OPS(op) \ +long long _atomic64_fetch_##op(long long *v, long long n); \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ - _atomic64_##op(&v->counter, i); \ + _atomic64_fetch_##op(&v->counter, i); \ +} \ +static inline void atomic64_##op(long long i, atomic64_t *v) \ +{ \ + smp_mb(); \ + return _atomic64_fetch_##op(&v->counter, i); \ } ATOMIC64_OP(and) ATOMIC64_OP(or) ATOMIC64_OP(xor) +#undef ATOMIC64_OPS + +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + smp_mb(); + return _atomic64_xchg_add(&v->counter, i); +} + /** * atomic64_add_return - add integer and return * @v: pointer of type atomic64_t @@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_dec(v) atomic64_sub(1LL, (v)) @@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) - #endif /* !__ASSEMBLY__ */ /* @@ -248,10 +272,10 @@ extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); @@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock, long long n); extern long long __atomic64_xchg_add_unless(volatile long long *p, int *lock, long long o, long long n); -extern long long __atomic64_and(volatile long long *p, int *lock, long long n); -extern long long __atomic64_or(volatile long long *p, int *lock, long long n); -extern long long __atomic64_xor(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index b0531a623653..4cefa0c9fd81 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,11 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline void atomic_add(int i, atomic_t *v) -{ - __insn_fetchadd4((void *)&v->counter, i); -} - /* * Note a subtlety of the locking here. We are required to provide a * full memory barrier before and after the operation. However, we @@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#define ATOMIC_OPS(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int val; \ + smp_mb(); \ + val = __insn_fetch##op##4((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __insn_fetch##op##4((void *)&v->counter, i); \ +} + +ATOMIC_OPS(add) +ATOMIC_OPS(and) +ATOMIC_OPS(or) + +#undef ATOMIC_OPS + +static inline int atomic_fetch_xor(int i, atomic_t *v) { int guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch4(&v->counter, guess ^ i); } while (guess != oldval); + smp_mb(); return oldval; } -static inline void atomic_and(int i, atomic_t *v) -{ - __insn_fetchand4((void *)&v->counter, i); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - __insn_fetchor4((void *)&v->counter, i); -} - static inline void atomic_xor(int i, atomic_t *v) { int guess, oldval = v->counter; @@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v) } while (guess != oldval); } +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + /* Now the true 64-bit operations. */ #define ATOMIC64_INIT(i) { (i) } @@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v) #define atomic64_read(v) READ_ONCE((v)->counter) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) -static inline void atomic64_add(long i, atomic64_t *v) -{ - __insn_fetchadd((void *)&v->counter, i); -} - static inline long atomic64_add_return(long i, atomic64_t *v) { int val; @@ -112,6 +125,49 @@ static inline long atomic64_add_return(long i, atomic64_t *v) return val; } +#define ATOMIC64_OPS(op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long val; \ + smp_mb(); \ + val = __insn_fetch##op((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __insn_fetch##op((void *)&v->counter, i); \ +} + +ATOMIC64_OPS(add) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) + +#undef ATOMIC64_OPS + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) +{ + long guess, oldval = v->counter; + smp_mb(); + do { + guess = oldval; + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); + } while (guess != oldval); + smp_mb(); + return oldval; +} + +static inline void atomic64_xor(long i, atomic64_t *v) +{ + long guess, oldval = v->counter; + do { + guess = oldval; + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); + } while (guess != oldval); +} + static inline long atomic64_add_unless(atomic64_t *v, long a, long u) { long guess, oldval = v->counter; @@ -124,27 +180,8 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) return oldval != u; } -static inline void atomic64_and(long i, atomic64_t *v) -{ - __insn_fetchand((void *)&v->counter, i); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - __insn_fetchor((void *)&v->counter, i); -} - -static inline void atomic64_xor(long i, atomic64_t *v) -{ - long guess, oldval = v->counter; - do { - guess = oldval; - __insn_mtspr(SPR_CMPEXCH_VALUE, guess); - oldval = __insn_cmpexch(&v->counter, guess ^ i); - } while (guess != oldval); -} - #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index bbf7b666f21d..d1406a95f6b7 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #include /* Tile-specific routines to support . */ -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask); /** * set_bit - Atomically set a bit in memory @@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); */ static inline void set_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) */ static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_or(addr, mask) & mask) != 0; + return (_atomic_fetch_or(addr, mask) & mask) != 0; } /** @@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_andn(addr, mask) & mask) != 0; + return (_atomic_fetch_andn(addr, mask) & mask) != 0; } /** @@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr, unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_xor(addr, mask) & mask) != 0; + return (_atomic_fetch_xor(addr, mask) & mask) != 0; } #include diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 298df1e9912a..5b6bd932c9c7 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -88,29 +88,29 @@ int _atomic_cmpxchg(int *v, int o, int n) } EXPORT_SYMBOL(_atomic_cmpxchg); -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_or((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_or); +EXPORT_SYMBOL(_atomic_fetch_or); -unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_and((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_and); +EXPORT_SYMBOL(_atomic_fetch_and); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_andn); +EXPORT_SYMBOL(_atomic_fetch_andn); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_xor); +EXPORT_SYMBOL(_atomic_fetch_xor); long long _atomic64_xchg(long long *v, long long n) @@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n) } EXPORT_SYMBOL(_atomic64_cmpxchg); -long long _atomic64_and(long long *v, long long n) +long long _atomic64_fetch_and(long long *v, long long n) { - return __atomic64_and(v, __atomic_setup(v), n); + return __atomic64_fetch_and(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_and); +EXPORT_SYMBOL(_atomic64_fetch_and); -long long _atomic64_or(long long *v, long long n) +long long _atomic64_fetch_or(long long *v, long long n) { - return __atomic64_or(v, __atomic_setup(v), n); + return __atomic64_fetch_or(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_or); +EXPORT_SYMBOL(_atomic64_fetch_or); -long long _atomic64_xor(long long *v, long long n) +long long _atomic64_fetch_xor(long long *v, long long n) { - return __atomic64_xor(v, __atomic_setup(v), n); + return __atomic64_fetch_xor(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_xor); +EXPORT_SYMBOL(_atomic64_fetch_xor); /* * If any of the atomic or futex routines hit a bad address (not in diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index f611265633d6..507abdd2bf9a 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -177,10 +177,10 @@ atomic_op _xchg, 32, "move r24, r2" atomic_op _xchg_add, 32, "add r24, r22, r2" atomic_op _xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _or, 32, "or r24, r22, r2" -atomic_op _and, 32, "and r24, r22, r2" -atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _xor, 32, "xor r24, r22, r2" +atomic_op _fetch_or, 32, "or r24, r22, r2" +atomic_op _fetch_and, 32, "and r24, r22, r2" +atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op _fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" @@ -192,9 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \ { bbns r26, 3f; add r24, r22, r4 }; \ { bbns r27, 3f; add r25, r23, r5 }; \ slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" -atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" -atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" +atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" +atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" +atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" jrp lr /* happy backtracer */ From a8bcccaba162632c3963259b8a442c6b490f4c68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: [PATCH 50/67] locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 37 +++++++++++++++++++++++++++--- arch/x86/include/asm/atomic64_32.h | 25 +++++++++++++++++--- arch/x86/include/asm/atomic64_64.h | 35 +++++++++++++++++++++++++--- 3 files changed, 88 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e8674288198..73b8463b89e9 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) +static __always_inline int atomic_fetch_add(int i, atomic_t *v) +{ + return xadd(&v->counter, i); +} + +static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +{ + return xadd(&v->counter, -i); +} + static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); @@ -190,10 +200,31 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int old, val = atomic_read(v); \ + for (;;) { \ + old = atomic_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op) \ + ATOMIC_FETCH_OP(op, c_op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and, &) +ATOMIC_OPS(or , |) +ATOMIC_OPS(xor, ^) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP /** diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a984111135b1..71d7705fb303 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ c = old; \ } -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + long long old, c = 0; \ + while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ + c = old; \ + return old; \ +} +ATOMIC64_FETCH_OP(add, +) + +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 037351022f54..70eed0e14553 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) return atomic64_add_return(-i, v); } +static inline long atomic64_fetch_add(long i, atomic64_t *v) +{ + return xadd(&v->counter, i); +} + +static inline long atomic64_fetch_sub(long i, atomic64_t *v) +{ + return xadd(&v->counter, -i); +} + #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) @@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ : "memory"); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long old, val = atomic64_read(v); \ + for (;;) { \ + old = atomic64_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_64_H */ From 6dc25876cdb17fd3906504dcabb9e537f8433000 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: [PATCH 51/67] locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Zankel Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Signed-off-by: Ingo Molnar --- arch/xtensa/include/asm/atomic.h | 54 +++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index fd8017ce298a..d95a8aa1a6d3 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ +} + #else /* XCHAL_HAVE_S32C1I */ #define ATOMIC_OP(op) \ @@ -138,18 +158,44 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return vval; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned int tmp, vval; \ + \ + __asm__ __volatile__( \ + " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " l32i %0, %3, 0\n" \ + " " #op " %1, %0, %2\n" \ + " s32i %1, %3, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ + \ + return vval; \ +} + #endif /* XCHAL_HAVE_S32C1I */ -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP From e12133324b7daaa176bb687c1eb59e1a6b203da4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 00:52:13 +0200 Subject: [PATCH 52/67] locking/atomic: Fix atomic64_relaxed() bits We should only expand the atomic64 relaxed bits once we've included all relevant headers. So move it down until after we potentially include asm-generic/atomic64.h. In practise this will not have made a difference so far, since the generic bits will not define _relaxed versions. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 306 ++++++++++++++++++++--------------------- 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e451534fe54d..351f89e1d15c 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -211,159 +211,6 @@ #endif #endif /* atomic_cmpxchg_relaxed */ -#ifndef atomic64_read_acquire -#define atomic64_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic64_set_release -#define atomic64_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - -/* atomic64_add_return_relaxed */ -#ifndef atomic64_add_return_relaxed -#define atomic64_add_return_relaxed atomic64_add_return -#define atomic64_add_return_acquire atomic64_add_return -#define atomic64_add_return_release atomic64_add_return - -#else /* atomic64_add_return_relaxed */ - -#ifndef atomic64_add_return_acquire -#define atomic64_add_return_acquire(...) \ - __atomic_op_acquire(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return_release -#define atomic64_add_return_release(...) \ - __atomic_op_release(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return -#define atomic64_add_return(...) \ - __atomic_op_fence(atomic64_add_return, __VA_ARGS__) -#endif -#endif /* atomic64_add_return_relaxed */ - -/* atomic64_inc_return_relaxed */ -#ifndef atomic64_inc_return_relaxed -#define atomic64_inc_return_relaxed atomic64_inc_return -#define atomic64_inc_return_acquire atomic64_inc_return -#define atomic64_inc_return_release atomic64_inc_return - -#else /* atomic64_inc_return_relaxed */ - -#ifndef atomic64_inc_return_acquire -#define atomic64_inc_return_acquire(...) \ - __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return_release -#define atomic64_inc_return_release(...) \ - __atomic_op_release(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return -#define atomic64_inc_return(...) \ - __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) -#endif -#endif /* atomic64_inc_return_relaxed */ - - -/* atomic64_sub_return_relaxed */ -#ifndef atomic64_sub_return_relaxed -#define atomic64_sub_return_relaxed atomic64_sub_return -#define atomic64_sub_return_acquire atomic64_sub_return -#define atomic64_sub_return_release atomic64_sub_return - -#else /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_sub_return_acquire -#define atomic64_sub_return_acquire(...) \ - __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return_release -#define atomic64_sub_return_release(...) \ - __atomic_op_release(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return -#define atomic64_sub_return(...) \ - __atomic_op_fence(atomic64_sub_return, __VA_ARGS__) -#endif -#endif /* atomic64_sub_return_relaxed */ - -/* atomic64_dec_return_relaxed */ -#ifndef atomic64_dec_return_relaxed -#define atomic64_dec_return_relaxed atomic64_dec_return -#define atomic64_dec_return_acquire atomic64_dec_return -#define atomic64_dec_return_release atomic64_dec_return - -#else /* atomic64_dec_return_relaxed */ - -#ifndef atomic64_dec_return_acquire -#define atomic64_dec_return_acquire(...) \ - __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return_release -#define atomic64_dec_return_release(...) \ - __atomic_op_release(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return -#define atomic64_dec_return(...) \ - __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) -#endif -#endif /* atomic64_dec_return_relaxed */ - -/* atomic64_xchg_relaxed */ -#ifndef atomic64_xchg_relaxed -#define atomic64_xchg_relaxed atomic64_xchg -#define atomic64_xchg_acquire atomic64_xchg -#define atomic64_xchg_release atomic64_xchg - -#else /* atomic64_xchg_relaxed */ - -#ifndef atomic64_xchg_acquire -#define atomic64_xchg_acquire(...) \ - __atomic_op_acquire(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg_release -#define atomic64_xchg_release(...) \ - __atomic_op_release(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg -#define atomic64_xchg(...) \ - __atomic_op_fence(atomic64_xchg, __VA_ARGS__) -#endif -#endif /* atomic64_xchg_relaxed */ - -/* atomic64_cmpxchg_relaxed */ -#ifndef atomic64_cmpxchg_relaxed -#define atomic64_cmpxchg_relaxed atomic64_cmpxchg -#define atomic64_cmpxchg_acquire atomic64_cmpxchg -#define atomic64_cmpxchg_release atomic64_cmpxchg - -#else /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_cmpxchg_acquire -#define atomic64_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg_release -#define atomic64_cmpxchg_release(...) \ - __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg -#define atomic64_cmpxchg(...) \ - __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic64_cmpxchg_relaxed */ - /* cmpxchg_relaxed */ #ifndef cmpxchg_relaxed #define cmpxchg_relaxed cmpxchg @@ -583,6 +430,159 @@ static inline int atomic_fetch_or(int mask, atomic_t *p) #include #endif +#ifndef atomic64_read_acquire +#define atomic64_read_acquire(v) smp_load_acquire(&(v)->counter) +#endif + +#ifndef atomic64_set_release +#define atomic64_set_release(v, i) smp_store_release(&(v)->counter, (i)) +#endif + +/* atomic64_add_return_relaxed */ +#ifndef atomic64_add_return_relaxed +#define atomic64_add_return_relaxed atomic64_add_return +#define atomic64_add_return_acquire atomic64_add_return +#define atomic64_add_return_release atomic64_add_return + +#else /* atomic64_add_return_relaxed */ + +#ifndef atomic64_add_return_acquire +#define atomic64_add_return_acquire(...) \ + __atomic_op_acquire(atomic64_add_return, __VA_ARGS__) +#endif + +#ifndef atomic64_add_return_release +#define atomic64_add_return_release(...) \ + __atomic_op_release(atomic64_add_return, __VA_ARGS__) +#endif + +#ifndef atomic64_add_return +#define atomic64_add_return(...) \ + __atomic_op_fence(atomic64_add_return, __VA_ARGS__) +#endif +#endif /* atomic64_add_return_relaxed */ + +/* atomic64_inc_return_relaxed */ +#ifndef atomic64_inc_return_relaxed +#define atomic64_inc_return_relaxed atomic64_inc_return +#define atomic64_inc_return_acquire atomic64_inc_return +#define atomic64_inc_return_release atomic64_inc_return + +#else /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_inc_return_acquire +#define atomic64_inc_return_acquire(...) \ + __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic64_inc_return_release +#define atomic64_inc_return_release(...) \ + __atomic_op_release(atomic64_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic64_inc_return +#define atomic64_inc_return(...) \ + __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) +#endif +#endif /* atomic64_inc_return_relaxed */ + + +/* atomic64_sub_return_relaxed */ +#ifndef atomic64_sub_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return +#define atomic64_sub_return_acquire atomic64_sub_return +#define atomic64_sub_return_release atomic64_sub_return + +#else /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_sub_return_acquire +#define atomic64_sub_return_acquire(...) \ + __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__) +#endif + +#ifndef atomic64_sub_return_release +#define atomic64_sub_return_release(...) \ + __atomic_op_release(atomic64_sub_return, __VA_ARGS__) +#endif + +#ifndef atomic64_sub_return +#define atomic64_sub_return(...) \ + __atomic_op_fence(atomic64_sub_return, __VA_ARGS__) +#endif +#endif /* atomic64_sub_return_relaxed */ + +/* atomic64_dec_return_relaxed */ +#ifndef atomic64_dec_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return +#define atomic64_dec_return_acquire atomic64_dec_return +#define atomic64_dec_return_release atomic64_dec_return + +#else /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_dec_return_acquire +#define atomic64_dec_return_acquire(...) \ + __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic64_dec_return_release +#define atomic64_dec_return_release(...) \ + __atomic_op_release(atomic64_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic64_dec_return +#define atomic64_dec_return(...) \ + __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) +#endif +#endif /* atomic64_dec_return_relaxed */ + +/* atomic64_xchg_relaxed */ +#ifndef atomic64_xchg_relaxed +#define atomic64_xchg_relaxed atomic64_xchg +#define atomic64_xchg_acquire atomic64_xchg +#define atomic64_xchg_release atomic64_xchg + +#else /* atomic64_xchg_relaxed */ + +#ifndef atomic64_xchg_acquire +#define atomic64_xchg_acquire(...) \ + __atomic_op_acquire(atomic64_xchg, __VA_ARGS__) +#endif + +#ifndef atomic64_xchg_release +#define atomic64_xchg_release(...) \ + __atomic_op_release(atomic64_xchg, __VA_ARGS__) +#endif + +#ifndef atomic64_xchg +#define atomic64_xchg(...) \ + __atomic_op_fence(atomic64_xchg, __VA_ARGS__) +#endif +#endif /* atomic64_xchg_relaxed */ + +/* atomic64_cmpxchg_relaxed */ +#ifndef atomic64_cmpxchg_relaxed +#define atomic64_cmpxchg_relaxed atomic64_cmpxchg +#define atomic64_cmpxchg_acquire atomic64_cmpxchg +#define atomic64_cmpxchg_release atomic64_cmpxchg + +#else /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_cmpxchg_acquire +#define atomic64_cmpxchg_acquire(...) \ + __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__) +#endif + +#ifndef atomic64_cmpxchg_release +#define atomic64_cmpxchg_release(...) \ + __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__) +#endif + +#ifndef atomic64_cmpxchg +#define atomic64_cmpxchg(...) \ + __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__) +#endif +#endif /* atomic64_cmpxchg_relaxed */ + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { From 28aa2bda2211f4327d83b44a4f917b4a061b1c56 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 00:54:38 +0200 Subject: [PATCH 53/67] locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() Now that all the architectures have implemented support for these new atomic primitives add on the generic infrastructure to expose and use it. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Arnd Bergmann Cc: Boqun Feng Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 36 +++- include/asm-generic/atomic.h | 49 +++++ include/asm-generic/atomic64.h | 15 +- include/linux/atomic.h | 336 ++++++++++++++++++++++++++++++ lib/atomic64.c | 32 ++- lib/atomic64_test.c | 34 +++ 6 files changed, 493 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 5e1f345b58dd..2d0d3cf791ab 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -112,6 +112,40 @@ static __always_inline void atomic_long_dec(atomic_long_t *l) ATOMIC_LONG_PFX(_dec)(v); } +#define ATOMIC_LONG_FETCH_OP(op, mo) \ +static inline long \ +atomic_long_fetch_##op##mo(long i, atomic_long_t *l) \ +{ \ + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ + \ + return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v); \ +} + +ATOMIC_LONG_FETCH_OP(add, ) +ATOMIC_LONG_FETCH_OP(add, _relaxed) +ATOMIC_LONG_FETCH_OP(add, _acquire) +ATOMIC_LONG_FETCH_OP(add, _release) +ATOMIC_LONG_FETCH_OP(sub, ) +ATOMIC_LONG_FETCH_OP(sub, _relaxed) +ATOMIC_LONG_FETCH_OP(sub, _acquire) +ATOMIC_LONG_FETCH_OP(sub, _release) +ATOMIC_LONG_FETCH_OP(and, ) +ATOMIC_LONG_FETCH_OP(and, _relaxed) +ATOMIC_LONG_FETCH_OP(and, _acquire) +ATOMIC_LONG_FETCH_OP(and, _release) +ATOMIC_LONG_FETCH_OP(andnot, ) +ATOMIC_LONG_FETCH_OP(andnot, _relaxed) +ATOMIC_LONG_FETCH_OP(andnot, _acquire) +ATOMIC_LONG_FETCH_OP(andnot, _release) +ATOMIC_LONG_FETCH_OP(or, ) +ATOMIC_LONG_FETCH_OP(or, _relaxed) +ATOMIC_LONG_FETCH_OP(or, _acquire) +ATOMIC_LONG_FETCH_OP(or, _release) +ATOMIC_LONG_FETCH_OP(xor, ) +ATOMIC_LONG_FETCH_OP(xor, _relaxed) +ATOMIC_LONG_FETCH_OP(xor, _acquire) +ATOMIC_LONG_FETCH_OP(xor, _release) + #define ATOMIC_LONG_OP(op) \ static __always_inline void \ atomic_long_##op(long i, atomic_long_t *l) \ @@ -124,9 +158,9 @@ atomic_long_##op(long i, atomic_long_t *l) \ ATOMIC_LONG_OP(add) ATOMIC_LONG_OP(sub) ATOMIC_LONG_OP(and) +ATOMIC_LONG_OP(andnot) ATOMIC_LONG_OP(or) ATOMIC_LONG_OP(xor) -ATOMIC_LONG_OP(andnot) #undef ATOMIC_LONG_OP diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 74f1a3704d7a..a2304ccf4ed0 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -61,6 +61,18 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return c c_op i; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int c, old; \ + \ + c = v->counter; \ + while ((old = cmpxchg(&v->counter, c, c c_op i)) != c) \ + c = old; \ + \ + return c; \ +} + #else #include @@ -88,6 +100,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + raw_local_irq_save(flags); \ + ret = v->counter; \ + v->counter = v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return ret; \ +} + #endif /* CONFIG_SMP */ #ifndef atomic_add_return @@ -98,6 +124,28 @@ ATOMIC_OP_RETURN(add, +) ATOMIC_OP_RETURN(sub, -) #endif +#ifndef atomic_fetch_add +ATOMIC_FETCH_OP(add, +) +#endif + +#ifndef atomic_fetch_sub +ATOMIC_FETCH_OP(sub, -) +#endif + +#ifndef atomic_fetch_and +ATOMIC_FETCH_OP(and, &) +#endif + +#ifndef atomic_fetch_or +#define atomic_fetch_or atomic_fetch_or + +ATOMIC_FETCH_OP(or, |) +#endif + +#ifndef atomic_fetch_xor +ATOMIC_FETCH_OP(xor, ^) +#endif + #ifndef atomic_and ATOMIC_OP(and, &) #endif @@ -110,6 +158,7 @@ ATOMIC_OP(or, |) ATOMIC_OP(xor, ^) #endif +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index d48e78ccad3d..dad68bf46c77 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -27,16 +27,23 @@ extern void atomic64_##op(long long a, atomic64_t *v); #define ATOMIC64_OP_RETURN(op) \ extern long long atomic64_##op##_return(long long a, atomic64_t *v); -#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) +#define ATOMIC64_FETCH_OP(op) \ +extern long long atomic64_fetch_##op(long long a, atomic64_t *v); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) ATOMIC64_OPS(add) ATOMIC64_OPS(sub) -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op) + +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 351f89e1d15c..2e6c013ac5a4 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -163,6 +163,154 @@ #endif #endif /* atomic_dec_return_relaxed */ + +/* atomic_fetch_add_relaxed */ +#ifndef atomic_fetch_add_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add +#define atomic_fetch_add_acquire atomic_fetch_add +#define atomic_fetch_add_release atomic_fetch_add + +#else /* atomic_fetch_add_relaxed */ + +#ifndef atomic_fetch_add_acquire +#define atomic_fetch_add_acquire(...) \ + __atomic_op_acquire(atomic_fetch_add, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_add_release +#define atomic_fetch_add_release(...) \ + __atomic_op_release(atomic_fetch_add, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_add +#define atomic_fetch_add(...) \ + __atomic_op_fence(atomic_fetch_add, __VA_ARGS__) +#endif +#endif /* atomic_fetch_add_relaxed */ + +/* atomic_fetch_sub_relaxed */ +#ifndef atomic_fetch_sub_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub +#define atomic_fetch_sub_acquire atomic_fetch_sub +#define atomic_fetch_sub_release atomic_fetch_sub + +#else /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_fetch_sub_acquire +#define atomic_fetch_sub_acquire(...) \ + __atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_sub_release +#define atomic_fetch_sub_release(...) \ + __atomic_op_release(atomic_fetch_sub, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_sub +#define atomic_fetch_sub(...) \ + __atomic_op_fence(atomic_fetch_sub, __VA_ARGS__) +#endif +#endif /* atomic_fetch_sub_relaxed */ + +/* atomic_fetch_or_relaxed */ +#ifndef atomic_fetch_or_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or +#define atomic_fetch_or_acquire atomic_fetch_or +#define atomic_fetch_or_release atomic_fetch_or + +#else /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_or_acquire +#define atomic_fetch_or_acquire(...) \ + __atomic_op_acquire(atomic_fetch_or, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_or_release +#define atomic_fetch_or_release(...) \ + __atomic_op_release(atomic_fetch_or, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_or +#define atomic_fetch_or(...) \ + __atomic_op_fence(atomic_fetch_or, __VA_ARGS__) +#endif +#endif /* atomic_fetch_or_relaxed */ + +/* atomic_fetch_and_relaxed */ +#ifndef atomic_fetch_and_relaxed +#define atomic_fetch_and_relaxed atomic_fetch_and +#define atomic_fetch_and_acquire atomic_fetch_and +#define atomic_fetch_and_release atomic_fetch_and + +#else /* atomic_fetch_and_relaxed */ + +#ifndef atomic_fetch_and_acquire +#define atomic_fetch_and_acquire(...) \ + __atomic_op_acquire(atomic_fetch_and, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_and_release +#define atomic_fetch_and_release(...) \ + __atomic_op_release(atomic_fetch_and, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_and +#define atomic_fetch_and(...) \ + __atomic_op_fence(atomic_fetch_and, __VA_ARGS__) +#endif +#endif /* atomic_fetch_and_relaxed */ + +#ifdef atomic_andnot +/* atomic_fetch_andnot_relaxed */ +#ifndef atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot + +#else /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_acquire(...) \ + __atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_andnot_release +#define atomic_fetch_andnot_release(...) \ + __atomic_op_release(atomic_fetch_andnot, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_andnot +#define atomic_fetch_andnot(...) \ + __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) +#endif +#endif /* atomic_fetch_andnot_relaxed */ +#endif /* atomic_andnot */ + +/* atomic_fetch_xor_relaxed */ +#ifndef atomic_fetch_xor_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor +#define atomic_fetch_xor_acquire atomic_fetch_xor +#define atomic_fetch_xor_release atomic_fetch_xor + +#else /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_fetch_xor_acquire +#define atomic_fetch_xor_acquire(...) \ + __atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_xor_release +#define atomic_fetch_xor_release(...) \ + __atomic_op_release(atomic_fetch_xor, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_xor +#define atomic_fetch_xor(...) \ + __atomic_op_fence(atomic_fetch_xor, __VA_ARGS__) +#endif +#endif /* atomic_fetch_xor_relaxed */ + + /* atomic_xchg_relaxed */ #ifndef atomic_xchg_relaxed #define atomic_xchg_relaxed atomic_xchg @@ -310,6 +458,26 @@ static inline void atomic_andnot(int i, atomic_t *v) { atomic_and(~i, v); } + +static inline int atomic_fetch_andnot(int i, atomic_t *v) +{ + return atomic_fetch_and(~i, v); +} + +static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v) +{ + return atomic_fetch_and_relaxed(~i, v); +} + +static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + return atomic_fetch_and_acquire(~i, v); +} + +static inline int atomic_fetch_andnot_release(int i, atomic_t *v) +{ + return atomic_fetch_and_release(~i, v); +} #endif static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) @@ -535,6 +703,154 @@ static inline int atomic_fetch_or(int mask, atomic_t *p) #endif #endif /* atomic64_dec_return_relaxed */ + +/* atomic64_fetch_add_relaxed */ +#ifndef atomic64_fetch_add_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add +#define atomic64_fetch_add_acquire atomic64_fetch_add +#define atomic64_fetch_add_release atomic64_fetch_add + +#else /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_fetch_add_acquire +#define atomic64_fetch_add_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_add_release +#define atomic64_fetch_add_release(...) \ + __atomic_op_release(atomic64_fetch_add, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_add +#define atomic64_fetch_add(...) \ + __atomic_op_fence(atomic64_fetch_add, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_add_relaxed */ + +/* atomic64_fetch_sub_relaxed */ +#ifndef atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub +#define atomic64_fetch_sub_acquire atomic64_fetch_sub +#define atomic64_fetch_sub_release atomic64_fetch_sub + +#else /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_sub_release +#define atomic64_fetch_sub_release(...) \ + __atomic_op_release(atomic64_fetch_sub, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_sub +#define atomic64_fetch_sub(...) \ + __atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_sub_relaxed */ + +/* atomic64_fetch_or_relaxed */ +#ifndef atomic64_fetch_or_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or +#define atomic64_fetch_or_acquire atomic64_fetch_or +#define atomic64_fetch_or_release atomic64_fetch_or + +#else /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_or_acquire +#define atomic64_fetch_or_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_or_release +#define atomic64_fetch_or_release(...) \ + __atomic_op_release(atomic64_fetch_or, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_or +#define atomic64_fetch_or(...) \ + __atomic_op_fence(atomic64_fetch_or, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_or_relaxed */ + +/* atomic64_fetch_and_relaxed */ +#ifndef atomic64_fetch_and_relaxed +#define atomic64_fetch_and_relaxed atomic64_fetch_and +#define atomic64_fetch_and_acquire atomic64_fetch_and +#define atomic64_fetch_and_release atomic64_fetch_and + +#else /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_fetch_and_acquire +#define atomic64_fetch_and_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_and_release +#define atomic64_fetch_and_release(...) \ + __atomic_op_release(atomic64_fetch_and, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_and +#define atomic64_fetch_and(...) \ + __atomic_op_fence(atomic64_fetch_and, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_and_relaxed */ + +#ifdef atomic64_andnot +/* atomic64_fetch_andnot_relaxed */ +#ifndef atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot + +#else /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_andnot_release +#define atomic64_fetch_andnot_release(...) \ + __atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_andnot +#define atomic64_fetch_andnot(...) \ + __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_andnot_relaxed */ +#endif /* atomic64_andnot */ + +/* atomic64_fetch_xor_relaxed */ +#ifndef atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor +#define atomic64_fetch_xor_acquire atomic64_fetch_xor +#define atomic64_fetch_xor_release atomic64_fetch_xor + +#else /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_xor_release +#define atomic64_fetch_xor_release(...) \ + __atomic_op_release(atomic64_fetch_xor, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_xor +#define atomic64_fetch_xor(...) \ + __atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_xor_relaxed */ + + /* atomic64_xchg_relaxed */ #ifndef atomic64_xchg_relaxed #define atomic64_xchg_relaxed atomic64_xchg @@ -588,6 +904,26 @@ static inline void atomic64_andnot(long long i, atomic64_t *v) { atomic64_and(~i, v); } + +static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v) +{ + return atomic64_fetch_and(~i, v); +} + +static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v) +{ + return atomic64_fetch_and_relaxed(~i, v); +} + +static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v) +{ + return atomic64_fetch_and_acquire(~i, v); +} + +static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v) +{ + return atomic64_fetch_and_release(~i, v); +} #endif #include diff --git a/lib/atomic64.c b/lib/atomic64.c index 2886ebac6567..53c2d5edc826 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \ } \ EXPORT_SYMBOL(atomic64_##op##_return); +#define ATOMIC64_FETCH_OP(op, c_op) \ +long long atomic64_fetch_##op(long long a, atomic64_t *v) \ +{ \ + unsigned long flags; \ + raw_spinlock_t *lock = lock_addr(v); \ + long long val; \ + \ + raw_spin_lock_irqsave(lock, flags); \ + val = v->counter; \ + v->counter c_op a; \ + raw_spin_unlock_irqrestore(lock, flags); \ + return val; \ +} \ +EXPORT_SYMBOL(atomic64_fetch_##op); + #define ATOMIC64_OPS(op, c_op) \ ATOMIC64_OP(op, c_op) \ - ATOMIC64_OP_RETURN(op, c_op) + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) -ATOMIC64_OP(and, &=) -ATOMIC64_OP(or, |=) -ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &=) +ATOMIC64_OPS(or, |=) +ATOMIC64_OPS(xor, ^=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 123481814320..dbb369145dda 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -53,11 +53,25 @@ do { \ BUG_ON(atomic##bit##_read(&v) != r); \ } while (0) +#define TEST_FETCH(bit, op, c_op, val) \ +do { \ + atomic##bit##_set(&v, v0); \ + r = v0; \ + r c_op val; \ + BUG_ON(atomic##bit##_##op(val, &v) != v0); \ + BUG_ON(atomic##bit##_read(&v) != r); \ +} while (0) + #define RETURN_FAMILY_TEST(bit, op, c_op, val) \ do { \ FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \ } while (0) +#define FETCH_FAMILY_TEST(bit, op, c_op, val) \ +do { \ + FAMILY_TEST(TEST_FETCH, bit, op, c_op, val); \ +} while (0) + #define TEST_ARGS(bit, op, init, ret, expect, args...) \ do { \ atomic##bit##_set(&v, init); \ @@ -114,6 +128,16 @@ static __init void test_atomic(void) RETURN_FAMILY_TEST(, sub_return, -=, onestwos); RETURN_FAMILY_TEST(, sub_return, -=, -one); + FETCH_FAMILY_TEST(, fetch_add, +=, onestwos); + FETCH_FAMILY_TEST(, fetch_add, +=, -one); + FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos); + FETCH_FAMILY_TEST(, fetch_sub, -=, -one); + + FETCH_FAMILY_TEST(, fetch_or, |=, v1); + FETCH_FAMILY_TEST(, fetch_and, &=, v1); + FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1); + FETCH_FAMILY_TEST(, fetch_xor, ^=, v1); + INC_RETURN_FAMILY_TEST(, v0); DEC_RETURN_FAMILY_TEST(, v0); @@ -154,6 +178,16 @@ static __init void test_atomic64(void) RETURN_FAMILY_TEST(64, sub_return, -=, onestwos); RETURN_FAMILY_TEST(64, sub_return, -=, -one); + FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos); + FETCH_FAMILY_TEST(64, fetch_add, +=, -one); + FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos); + FETCH_FAMILY_TEST(64, fetch_sub, -=, -one); + + FETCH_FAMILY_TEST(64, fetch_or, |=, v1); + FETCH_FAMILY_TEST(64, fetch_and, &=, v1); + FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1); + FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1); + INIT(v0); atomic64_inc(&v); r += one; From b53d6bedbe781974097fd8c38263f6cc78ff9ea7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 00:58:25 +0200 Subject: [PATCH 54/67] locking/atomic: Remove linux/atomic.h:atomic_fetch_or() Since all architectures have this implemented now natively, remove this dead code. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 2 -- arch/arc/include/asm/atomic.h | 2 -- arch/arm/include/asm/atomic.h | 2 -- arch/arm64/include/asm/atomic.h | 2 -- arch/avr32/include/asm/atomic.h | 2 -- arch/frv/include/asm/atomic.h | 2 -- arch/h8300/include/asm/atomic.h | 2 -- arch/hexagon/include/asm/atomic.h | 2 -- arch/m32r/include/asm/atomic.h | 2 -- arch/m68k/include/asm/atomic.h | 2 -- arch/metag/include/asm/atomic.h | 2 -- arch/mips/include/asm/atomic.h | 2 -- arch/mn10300/include/asm/atomic.h | 2 -- arch/parisc/include/asm/atomic.h | 2 -- arch/s390/include/asm/atomic.h | 2 -- arch/sh/include/asm/atomic.h | 2 -- arch/sparc/include/asm/atomic.h | 1 - arch/sparc/include/asm/atomic_32.h | 2 -- arch/tile/include/asm/atomic.h | 2 -- arch/x86/include/asm/atomic.h | 2 -- arch/xtensa/include/asm/atomic.h | 2 -- include/asm-generic/atomic.h | 2 -- include/linux/atomic.h | 21 --------------------- 23 files changed, 64 deletions(-) diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 8243f17999e3..5377ca8bb503 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -153,8 +153,6 @@ ATOMIC_OPS(sub) #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -#define atomic_fetch_or atomic_fetch_or - #undef ATOMIC_OPS #define ATOMIC_OPS(op, asm) \ ATOMIC_OP(op, asm) \ diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index c066a21caaaf..bd9c51cb2bfd 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -189,8 +189,6 @@ ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -#define atomic_fetch_or atomic_fetch_or - #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 0feb110ec542..66d0e215a773 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -201,8 +201,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ return val; \ } -#define atomic_fetch_or atomic_fetch_or - static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 3128c3d7c1ff..c0235e0ff849 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -128,8 +128,6 @@ #define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) #define atomic_andnot atomic_andnot -#define atomic_fetch_or atomic_fetch_or - /* * 64-bit atomic operations. */ diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index b8681fd495ef..3d5ce38a6f0b 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -66,8 +66,6 @@ ATOMIC_OP_RETURN(add, add, r) ATOMIC_FETCH_OP (sub, sub, rKs21) ATOMIC_FETCH_OP (add, add, r) -#define atomic_fetch_or atomic_fetch_or - #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index e3e06da0cd59..1c2a5e264fc7 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -74,8 +74,6 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) -#define atomic_fetch_or atomic_fetch_or - /* * 64-bit atomic ops */ diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 0961b618bdde..349a47a918db 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -54,8 +54,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -#define atomic_fetch_or atomic_fetch_or - #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ ATOMIC_FETCH_OP(op, c_op) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 07dbb3332b4a..a62ba368b27d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -152,8 +152,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 8ba8a0ab5d5d..640cc1c7099f 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -121,8 +121,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 5cf9b3b1b6ac..3e03de7ae33b 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -119,8 +119,6 @@ ATOMIC_OPS(sub, -=, sub) ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_FETCH_OP(op, c_op, asm_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, eor) diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h index 6ca210de8a7d..470e365f04ea 100644 --- a/arch/metag/include/asm/atomic.h +++ b/arch/metag/include/asm/atomic.h @@ -17,8 +17,6 @@ #include #endif -#define atomic_fetch_or atomic_fetch_or - #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 431079f8e483..387ce288334e 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -194,8 +194,6 @@ ATOMIC_OPS(sub, -=, subu) ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_FETCH_OP(op, c_op, asm_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 3580f789f3a6..36389efd45e8 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -113,8 +113,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 29df1f871910..5394b9c5f914 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -148,8 +148,6 @@ ATOMIC_OPS(sub, -=) ATOMIC_OP(op, c_op) \ ATOMIC_FETCH_OP(op, c_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 2324e759b544..d28cc2f5b7b2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -135,8 +135,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, AND) ATOMIC_OPS(or, OR) ATOMIC_OPS(xor, XOR) diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index d93ed7ce1b2f..c399e1c55685 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -25,8 +25,6 @@ #include #endif -#define atomic_fetch_or atomic_fetch_or - #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h index 1f741bcc73b7..8ff83d8cc33f 100644 --- a/arch/sparc/include/asm/atomic.h +++ b/arch/sparc/include/asm/atomic.h @@ -5,5 +5,4 @@ #else #include #endif -#define atomic_fetch_or atomic_fetch_or #endif diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 5cfb20a599d9..ee3f11c43cda 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -36,8 +36,6 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) -#define atomic_fetch_or atomic_fetch_or - #define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) #define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) #define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9807030557c4..8dda3c8ff5ab 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -48,8 +48,6 @@ static inline int atomic_read(const atomic_t *v) #define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) -#define atomic_fetch_or atomic_fetch_or - /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 73b8463b89e9..a58b99811105 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -217,8 +217,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OP(op) \ ATOMIC_FETCH_OP(op, c_op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and, &) ATOMIC_OPS(or , |) ATOMIC_OPS(xor, ^) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index d95a8aa1a6d3..e7a23f2a519a 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -188,8 +188,6 @@ ATOMIC_OPS(sub) #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) -#define atomic_fetch_or atomic_fetch_or - ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index a2304ccf4ed0..9ed8b987185b 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -137,8 +137,6 @@ ATOMIC_FETCH_OP(and, &) #endif #ifndef atomic_fetch_or -#define atomic_fetch_or atomic_fetch_or - ATOMIC_FETCH_OP(or, |) #endif diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 2e6c013ac5a4..0b3802d33125 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -573,27 +573,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif -/** - * atomic_fetch_or - perform *p |= mask and return old value of *p - * @mask: mask to OR on the atomic_t - * @p: pointer to atomic_t - */ -#ifndef atomic_fetch_or -static inline int atomic_fetch_or(int mask, atomic_t *p) -{ - int old, val = atomic_read(p); - - for (;;) { - old = atomic_cmpxchg(p, val, val | mask); - if (old == val) - break; - val = old; - } - - return old; -} -#endif - #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif From e37837fb62f95a81bdcefa86ceea043df84937d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:01:27 +0200 Subject: [PATCH 55/67] locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions These functions have been deprecated for a while and there is only the one user left, convert and kill. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 10 ---------- kernel/locking/qspinlock_paravirt.h | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 0b3802d33125..12d910d61b83 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -480,16 +480,6 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) } #endif -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_andnot(mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - /** * atomic_inc_not_zero_hint - increment if not null * @v: pointer of type atomic_t diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 21ede57f68b3..37649e69056c 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -112,12 +112,12 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) #else /* _Q_PENDING_BITS == 8 */ static __always_inline void set_pending(struct qspinlock *lock) { - atomic_set_mask(_Q_PENDING_VAL, &lock->val); + atomic_or(_Q_PENDING_VAL, &lock->val); } static __always_inline void clear_pending(struct qspinlock *lock) { - atomic_clear_mask(_Q_PENDING_VAL, &lock->val); + atomic_andnot(_Q_PENDING_VAL, &lock->val); } static __always_inline int trylock_clear_pending(struct qspinlock *lock) From fe14d2f12d5e641f114e27c2ea1fb85843c58967 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:09:20 +0200 Subject: [PATCH 56/67] locking/atomic, arch/alpha: Convert to _relaxed atomics Generic code will construct {,_acquire,_release} versions by adding the required smp_mb__{before,after}_atomic() calls. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 36 ++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 5377ca8bb503..498933a7df97 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } \ #define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ +static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -61,15 +60,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } #define ATOMIC_FETCH_OP(op, asm_op) \ -static inline int atomic_fetch_##op(int i, atomic_t *v) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -80,7 +77,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } @@ -101,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -116,15 +111,13 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } #define ATOMIC64_FETCH_OP(op, asm_op) \ -static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -135,7 +128,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ return result; \ } @@ -150,6 +142,16 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot @@ -165,6 +167,16 @@ ATOMIC_OPS(andnot, bic) ATOMIC_OPS(or, bis) ATOMIC_OPS(xor, xor) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOMIC_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN From 4ec45856b698c37e73d973fb4b1a094dfb9d5732 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:15:25 +0200 Subject: [PATCH 57/67] locking/atomic, arch/mips: Convert to _relaxed atomics Generic code will construct {,_acquire,_release} versions by adding the required smp_mb__{before,after}_atomic() calls. XXX if/when MIPS will start using their new SYNCxx instructions they can provide custom __atomic_op_{acquire,release}() macros as per the powerpc example. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Ingo Molnar --- arch/mips/include/asm/atomic.h | 42 ++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 387ce288334e..0ab176bdb8e8 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ +static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -125,18 +123,14 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ +static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -176,8 +170,6 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } @@ -189,6 +181,11 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -198,6 +195,10 @@ ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -420,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -467,18 +466,14 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } #define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ -static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -519,8 +514,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ - \ return result; \ } @@ -532,6 +525,11 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v) \ ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ @@ -541,6 +539,10 @@ ATOMIC64_OPS(and, &=, and) ATOMIC64_OPS(or, |=, or) ATOMIC64_OPS(xor, ^=, xor) +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN From f9852b74bec0117b888da39d070c323ea1cb7f4c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:27:03 +0200 Subject: [PATCH 58/67] locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire() The only reason for the current code is to make GCC emit only the "LOCK XADD" instruction on x86 (and not do a pointless extra ADD on the result), do so nicer. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/qrwlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index fec082338668..19248ddf37ce 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -93,7 +93,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) * that accesses can't leak upwards out of our subsequent critical * section in the case that the lock is currently held for write. */ - cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS; + cnts = atomic_fetch_add_acquire(_QR_BIAS, &lock->cnts); rspin_until_writer_unlock(lock, cnts); /* From 86a3b5f34fc1fb307abef4fde76bebd3edce0324 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 May 2016 12:42:21 +0200 Subject: [PATCH 59/67] locking/atomic, arch/rwsem: Employ atomic_long_fetch_add() Now that we have fetch_add() we can stop using add_return() - val. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2031281bb940..447e08de1fab 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -153,7 +153,7 @@ __rwsem_mark_wake(struct rw_semaphore *sem, if (wake_type != RWSEM_WAKE_READ_OWNED) { adjustment = RWSEM_ACTIVE_READ_BIAS; try_reader_grant: - oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment; + oldcount = atomic_long_fetch_add(adjustment, &sem->count); if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { /* From ebff09a6ff164aec2b33bf1f9a488c45ac108413 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Jun 2016 16:08:17 -0700 Subject: [PATCH 60/67] locking/Documentation: Clarify limited control-dependency scope Nothing in the control-dependencies section of memory-barriers.txt says that control dependencies don't extend beyond the end of the if-statement containing the control dependency. Worse yet, in many situations, they do extend beyond that if-statement. In particular, the compiler cannot destroy the control dependency given proper use of READ_ONCE() and WRITE_ONCE(). However, a weakly ordered system having a conditional-move instruction provides the control-dependency guarantee only to code within the scope of the if-statement itself. This commit therefore adds words and an example demonstrating this limitation of control dependencies. Reported-by: Will Deacon Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20160615230817.GA18039@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 147ae8ec836f..a4d0a99de04d 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -806,6 +806,41 @@ out-guess your code. More generally, although READ_ONCE() does force the compiler to actually emit code for a given load, it does not force the compiler to use the results. +In addition, control dependencies apply only to the then-clause and +else-clause of the if-statement in question. In particular, it does +not necessarily apply to code following the if-statement: + + q = READ_ONCE(a); + if (q) { + WRITE_ONCE(b, p); + } else { + WRITE_ONCE(b, r); + } + WRITE_ONCE(c, 1); /* BUG: No ordering against the read from "a". */ + +It is tempting to argue that there in fact is ordering because the +compiler cannot reorder volatile accesses and also cannot reorder +the writes to "b" with the condition. Unfortunately for this line +of reasoning, the compiler might compile the two writes to "b" as +conditional-move instructions, as in this fanciful pseudo-assembly +language: + + ld r1,a + ld r2,p + ld r3,r + cmp r1,$0 + cmov,ne r4,r2 + cmov,eq r4,r3 + st r4,b + st $1,c + +A weakly ordered CPU would have no dependency of any sort between the load +from "a" and the store to "c". The control dependencies would extend +only to the pair of cmov instructions and the store depending on them. +In short, control dependencies apply only to the stores in the then-clause +and else-clause of the if-statement in question (including functions +invoked by those two clauses), not to code following that if-statement. + Finally, control dependencies do -not- provide transitivity. This is demonstrated by two related examples, with the initial values of x and y both being zero: @@ -869,6 +904,12 @@ In summary: atomic{,64}_read() can help to preserve your control dependency. Please see the COMPILER BARRIER section for more information. + (*) Control dependencies apply only to the then-clause and else-clause + of the if-statement containing the control dependency, including + any functions that these two clauses call. Control dependencies + do -not- apply to code following the if-statement containing the + control dependency. + (*) Control dependencies pair normally with other types of barriers. (*) Control dependencies do -not- provide transitivity. If you From 4aef66c8ae91d00affeeb24cfb176b53354ac969 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2016 17:02:01 +0200 Subject: [PATCH 61/67] locking/atomic, arch/arc: Fix build Resolve conflict between commits: fbffe892e525 ("locking/atomic, arch/arc: Implement atomic_fetch_{add,sub,and,andnot,or,xor}()") and: ed6aefed726a ("Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff"") Reported-by: Guenter Roeck Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Nigel Topham Cc: Noam Camus Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vineet Gupta Cc: linux-kernel@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Ingo Molnar --- arch/arc/include/asm/atomic.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index bd9c51cb2bfd..4e3c1b6b0806 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -71,7 +71,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ static inline int atomic_fetch_##op(int i, atomic_t *v) \ { \ unsigned int val, orig; \ - SCOND_FAIL_RETRY_VAR_DEF \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -84,11 +83,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " " #asm_op " %[val], %[orig], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ : [val] "=&r" (val), \ [orig] "=&r" (orig) \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ : "cc"); \ @@ -199,10 +195,6 @@ ATOMIC_OPS(andnot, &= ~, bic) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS - #else /* CONFIG_ARC_PLAT_EZNPS */ static inline int atomic_read(const atomic_t *v) From 86a664d58f3ba2398a378dc9da6d4cfa737d2281 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2016 17:05:38 +0200 Subject: [PATCH 62/67] locking/atomic, arch/m68k: Remove comment I misread the inline asm. It uses a rare construct to provide an input to a previously declared output to do the atomic_read(). Reported-by: Geert Uytterhoeven Signed-off-by: Peter Zijlstra (Intel) Cc: Andreas Schwab Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Ingo Molnar --- arch/m68k/include/asm/atomic.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 3e03de7ae33b..cf4c3a7b1a45 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -38,13 +38,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ #ifdef CONFIG_RMW_INSNS -/* - * Am I reading these CAS loops right in that %2 is the old value and the first - * iteration uses an uninitialized value? - * - * Would it not make sense to add: tmp = atomic_read(v); to avoid this? - */ - #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ From b7271b9f3e18181559b96a610f4e42bdb04b07f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jun 2016 11:16:49 +0200 Subject: [PATCH 63/67] locking/atomic, arch/tile: Fix tilepro build The tilepro change wasn't ever compiled it seems (the 0day built bot also doesn't have a toolchain for it). Make it work. The thing that makes the patch bigger than desired is namespace collision with the C11 __atomic builtin functions. So rename the tilepro functions to __atomic32. Reported-by: Sudip Mukherjee Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Fixes: 1af5de9af138 ("locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()") Link: http://lkml.kernel.org/r/20160622091649.GB30154@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic_32.h | 24 ++++++++++++------------ arch/tile/include/asm/futex.h | 14 +++++++------- arch/tile/lib/atomic_32.c | 16 ++++++++-------- arch/tile/lib/atomic_asm_32.S | 21 +++++++++++++-------- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index da8eb4ed3752..a93774255136 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -143,15 +143,15 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ _atomic64_fetch_##op(&v->counter, i); \ } \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ { \ smp_mb(); \ return _atomic64_fetch_##op(&v->counter, i); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) #undef ATOMIC64_OPS @@ -266,16 +266,16 @@ struct __get_user { unsigned long val; int err; }; -extern struct __get_user __atomic_cmpxchg(volatile int *p, +extern struct __get_user __atomic32_cmpxchg(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add_unless(volatile int *p, +extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 1a6ef1b69cb1..e64a1b75fc38 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -80,16 +80,16 @@ ret = gu.err; \ } -#define __futex_set() __futex_call(__atomic_xchg) -#define __futex_add() __futex_call(__atomic_xchg_add) -#define __futex_or() __futex_call(__atomic_or) -#define __futex_andn() __futex_call(__atomic_andn) -#define __futex_xor() __futex_call(__atomic_xor) +#define __futex_set() __futex_call(__atomic32_xchg) +#define __futex_add() __futex_call(__atomic32_xchg_add) +#define __futex_or() __futex_call(__atomic32_fetch_or) +#define __futex_andn() __futex_call(__atomic32_fetch_andn) +#define __futex_xor() __futex_call(__atomic32_fetch_xor) #define __futex_cmpxchg() \ { \ - struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ - lock, oldval, oparg); \ + struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ val = gu.val; \ ret = gu.err; \ } diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 5b6bd932c9c7..f8128800dbf5 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v) int _atomic_xchg(int *v, int n) { - return __atomic_xchg(v, __atomic_setup(v), n).val; + return __atomic32_xchg(v, __atomic_setup(v), n).val; } EXPORT_SYMBOL(_atomic_xchg); int _atomic_xchg_add(int *v, int i) { - return __atomic_xchg_add(v, __atomic_setup(v), i).val; + return __atomic32_xchg_add(v, __atomic_setup(v), i).val; } EXPORT_SYMBOL(_atomic_xchg_add); @@ -78,37 +78,37 @@ int _atomic_xchg_add_unless(int *v, int a, int u) * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val; + return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val; } EXPORT_SYMBOL(_atomic_xchg_add_unless); int _atomic_cmpxchg(int *v, int o, int n) { - return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val; + return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val; } EXPORT_SYMBOL(_atomic_cmpxchg); unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_or); unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_and); unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_andn); unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val; } EXPORT_SYMBOL(_atomic_fetch_xor); diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 507abdd2bf9a..1a70e6c0f259 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic) .endif .endm -atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" -atomic_op _xchg, 32, "move r24, r2" -atomic_op _xchg_add, 32, "add r24, r22, r2" -atomic_op _xchg_add_unless, 32, \ + +/* + * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. + */ + +atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" +atomic_op 32_xchg, 32, "move r24, r2" +atomic_op 32_xchg_add, 32, "add r24, r22, r2" +atomic_op 32_xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _fetch_or, 32, "or r24, r22, r2" -atomic_op _fetch_and, 32, "and r24, r22, r2" -atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _fetch_xor, 32, "xor r24, r22, r2" +atomic_op 32_fetch_or, 32, "or r24, r22, r2" +atomic_op 32_fetch_and, 32, "and r24, r22, r2" +atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" From 0dceeaf599e6d9b8bd908ba4bd3dfee84aa26be2 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Tue, 14 Jun 2016 14:37:27 +0800 Subject: [PATCH 64/67] locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec() queued_spin_lock_slowpath() should not worry about another queued_spin_lock_slowpath() running in interrupt context and changing node->count by accident, because node->count keeps the same value every time we enter/leave queued_spin_lock_slowpath(). On some architectures this_cpu_dec() will save/restore irq flags, which has high overhead. Use the much cheaper __this_cpu_dec() instead. Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hpe.com Link: http://lkml.kernel.org/r/1465886247-3773-1-git-send-email-xinhui.pan@linux.vnet.ibm.com [ Rewrote changelog. ] Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 730655533440..b2caec7315af 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -619,7 +619,7 @@ release: /* * release the node */ - this_cpu_dec(mcs_nodes[0].count); + __this_cpu_dec(mcs_nodes[0].count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); From 885885f6b88d22f81e67ee6a61561e480b27d27a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:19:40 +0000 Subject: [PATCH 65/67] locking/static_keys: Fix non static symbol Sparse warning Fix the following sparse warnings: kernel/jump_label.c:473:23: warning: symbol 'jump_label_module_nb' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1466183980-8903-1-git-send-email-weiyj_lk@163.com Signed-off-by: Ingo Molnar --- kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 05254eeb4b4e..ac4ab953b49c 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -422,7 +422,7 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, return notifier_from_errno(ret); } -struct notifier_block jump_label_module_nb = { +static struct notifier_block jump_label_module_nb = { .notifier_call = jump_label_module_notify, .priority = 1, /* higher than tracepoints */ }; From 03e3c2b7edbe1e8758196b2c7843333eb328063d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jun 2016 18:43:54 +0100 Subject: [PATCH 66/67] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() smp_cond_load_acquire() is used to spin on a variable until some expression involving that variable becomes true. On arm64, we can build this using the LDXR and WFE instructions, since clearing of the exclusive monitor as a result of the variable being changed by another CPU generates an event, which will wake us up out of WFE. This patch implements smp_cond_load_acquire() using LDXR and WFE, which themselves are contained in an internal __cmpwait() function. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: catalin.marinas@arm.com Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/barrier.h | 13 ++++++++ arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49618db..4eea7f618dce 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b404454..bd86a79491bc 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name) \ +static inline void __cmpwait_case_##name(volatile void *ptr, \ + unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ + return __cmpwait_case##sfx##_8(ptr, val); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__CMPWAIT_GEN() + +#undef __CMPWAIT_GEN + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) + #endif /* __ASM_CMPXCHG_H */ From f06628638cf6e75f179742b6c1b35076965b9fdd Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 28 Jun 2016 14:56:51 -0700 Subject: [PATCH 67/67] locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API With the inclusion of atomic FETCH-OP variants, many places in the kernel can make use of atomic_fetch_$op() to avoid the callers that need to compute the value/state _before_ the operation. Peter Zijlstra laid out the machinery but we are still missing the simpler dec,inc() calls (which future patches will make use of). This patch only deals with the generic code, as at least right now no arch actually implement them -- which is similar to what the OP-RETURN primitives currently do. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: James.Bottomley@HansenPartnership.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: awalls@md.metrocast.net Cc: bp@alien8.de Cc: cw00.choi@samsung.com Cc: davem@davemloft.net Cc: dledford@redhat.com Cc: dougthompson@xmission.com Cc: gregkh@linuxfoundation.org Cc: hans.verkuil@cisco.com Cc: heiko.carstens@de.ibm.com Cc: jikos@kernel.org Cc: kys@microsoft.com Cc: mchehab@osg.samsung.com Cc: pfg@sgi.com Cc: schwidefsky@de.ibm.com Cc: sean.hefty@intel.com Cc: sumit.semwal@linaro.org Link: http://lkml.kernel.org/r/20160628215651.GA20048@linux-80c1.suse Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 22 +++++ include/linux/atomic.h | 128 ++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 2d0d3cf791ab..288cc9e96395 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -146,6 +146,28 @@ ATOMIC_LONG_FETCH_OP(xor, _relaxed) ATOMIC_LONG_FETCH_OP(xor, _acquire) ATOMIC_LONG_FETCH_OP(xor, _release) +#undef ATOMIC_LONG_FETCH_OP + +#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo) \ +static inline long \ +atomic_long_fetch_##op##mo(atomic_long_t *l) \ +{ \ + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ + \ + return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v); \ +} + +ATOMIC_LONG_FETCH_INC_DEC_OP(inc,) +ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed) +ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire) +ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release) +ATOMIC_LONG_FETCH_INC_DEC_OP(dec,) +ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed) +ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire) +ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release) + +#undef ATOMIC_LONG_FETCH_INC_DEC_OP + #define ATOMIC_LONG_OP(op) \ static __always_inline void \ atomic_long_##op(long i, atomic_long_t *l) \ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 12d910d61b83..e71835bf60a9 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -188,6 +188,38 @@ #endif #endif /* atomic_fetch_add_relaxed */ +/* atomic_fetch_inc_relaxed */ +#ifndef atomic_fetch_inc_relaxed + +#ifndef atomic_fetch_inc +#define atomic_fetch_inc(v) atomic_fetch_add(1, (v)) +#define atomic_fetch_inc_relaxed(v) atomic_fetch_add_relaxed(1, (v)) +#define atomic_fetch_inc_acquire(v) atomic_fetch_add_acquire(1, (v)) +#define atomic_fetch_inc_release(v) atomic_fetch_add_release(1, (v)) +#else /* atomic_fetch_inc */ +#define atomic_fetch_inc_relaxed atomic_fetch_inc +#define atomic_fetch_inc_acquire atomic_fetch_inc +#define atomic_fetch_inc_release atomic_fetch_inc +#endif /* atomic_fetch_inc */ + +#else /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_fetch_inc_acquire +#define atomic_fetch_inc_acquire(...) \ + __atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_inc_release +#define atomic_fetch_inc_release(...) \ + __atomic_op_release(atomic_fetch_inc, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_inc +#define atomic_fetch_inc(...) \ + __atomic_op_fence(atomic_fetch_inc, __VA_ARGS__) +#endif +#endif /* atomic_fetch_inc_relaxed */ + /* atomic_fetch_sub_relaxed */ #ifndef atomic_fetch_sub_relaxed #define atomic_fetch_sub_relaxed atomic_fetch_sub @@ -212,6 +244,38 @@ #endif #endif /* atomic_fetch_sub_relaxed */ +/* atomic_fetch_dec_relaxed */ +#ifndef atomic_fetch_dec_relaxed + +#ifndef atomic_fetch_dec +#define atomic_fetch_dec(v) atomic_fetch_sub(1, (v)) +#define atomic_fetch_dec_relaxed(v) atomic_fetch_sub_relaxed(1, (v)) +#define atomic_fetch_dec_acquire(v) atomic_fetch_sub_acquire(1, (v)) +#define atomic_fetch_dec_release(v) atomic_fetch_sub_release(1, (v)) +#else /* atomic_fetch_dec */ +#define atomic_fetch_dec_relaxed atomic_fetch_dec +#define atomic_fetch_dec_acquire atomic_fetch_dec +#define atomic_fetch_dec_release atomic_fetch_dec +#endif /* atomic_fetch_dec */ + +#else /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_dec_acquire +#define atomic_fetch_dec_acquire(...) \ + __atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_dec_release +#define atomic_fetch_dec_release(...) \ + __atomic_op_release(atomic_fetch_dec, __VA_ARGS__) +#endif + +#ifndef atomic_fetch_dec +#define atomic_fetch_dec(...) \ + __atomic_op_fence(atomic_fetch_dec, __VA_ARGS__) +#endif +#endif /* atomic_fetch_dec_relaxed */ + /* atomic_fetch_or_relaxed */ #ifndef atomic_fetch_or_relaxed #define atomic_fetch_or_relaxed atomic_fetch_or @@ -697,6 +761,38 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_fetch_add_relaxed */ +/* atomic64_fetch_inc_relaxed */ +#ifndef atomic64_fetch_inc_relaxed + +#ifndef atomic64_fetch_inc +#define atomic64_fetch_inc(v) atomic64_fetch_add(1, (v)) +#define atomic64_fetch_inc_relaxed(v) atomic64_fetch_add_relaxed(1, (v)) +#define atomic64_fetch_inc_acquire(v) atomic64_fetch_add_acquire(1, (v)) +#define atomic64_fetch_inc_release(v) atomic64_fetch_add_release(1, (v)) +#else /* atomic64_fetch_inc */ +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc +#define atomic64_fetch_inc_acquire atomic64_fetch_inc +#define atomic64_fetch_inc_release atomic64_fetch_inc +#endif /* atomic64_fetch_inc */ + +#else /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_fetch_inc_acquire +#define atomic64_fetch_inc_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_inc_release +#define atomic64_fetch_inc_release(...) \ + __atomic_op_release(atomic64_fetch_inc, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_inc +#define atomic64_fetch_inc(...) \ + __atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_inc_relaxed */ + /* atomic64_fetch_sub_relaxed */ #ifndef atomic64_fetch_sub_relaxed #define atomic64_fetch_sub_relaxed atomic64_fetch_sub @@ -721,6 +817,38 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_fetch_sub_relaxed */ +/* atomic64_fetch_dec_relaxed */ +#ifndef atomic64_fetch_dec_relaxed + +#ifndef atomic64_fetch_dec +#define atomic64_fetch_dec(v) atomic64_fetch_sub(1, (v)) +#define atomic64_fetch_dec_relaxed(v) atomic64_fetch_sub_relaxed(1, (v)) +#define atomic64_fetch_dec_acquire(v) atomic64_fetch_sub_acquire(1, (v)) +#define atomic64_fetch_dec_release(v) atomic64_fetch_sub_release(1, (v)) +#else /* atomic64_fetch_dec */ +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec +#define atomic64_fetch_dec_acquire atomic64_fetch_dec +#define atomic64_fetch_dec_release atomic64_fetch_dec +#endif /* atomic64_fetch_dec */ + +#else /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_dec_acquire +#define atomic64_fetch_dec_acquire(...) \ + __atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_dec_release +#define atomic64_fetch_dec_release(...) \ + __atomic_op_release(atomic64_fetch_dec, __VA_ARGS__) +#endif + +#ifndef atomic64_fetch_dec +#define atomic64_fetch_dec(...) \ + __atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__) +#endif +#endif /* atomic64_fetch_dec_relaxed */ + /* atomic64_fetch_or_relaxed */ #ifndef atomic64_fetch_or_relaxed #define atomic64_fetch_or_relaxed atomic64_fetch_or