Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "Here are the locking changes in this cycle:

   - rwsem unification and simpler micro-optimizations to prepare for
     more intrusive (and more lucrative) scalability improvements in
     v5.3 (Waiman Long)

   - Lockdep irq state tracking flag usage cleanups (Frederic
     Weisbecker)

   - static key improvements (Jakub Kicinski, Peter Zijlstra)

   - misc updates, cleanups and smaller fixes"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  locking/lockdep: Remove unnecessary unlikely()
  locking/static_key: Don't take sleeping locks in __static_key_slow_dec_deferred()
  locking/static_key: Factor out the fast path of static_key_slow_dec()
  locking/static_key: Add support for deferred static branches
  locking/lockdep: Test all incompatible scenarios at once in check_irq_usage()
  locking/lockdep: Avoid bogus Clang warning
  locking/lockdep: Generate LOCKF_ bit composites
  locking/lockdep: Use expanded masks on find_usage_*() functions
  locking/lockdep: Map remaining magic numbers to lock usage mask names
  locking/lockdep: Move valid_state() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
  locking/rwsem: Prevent unneeded warning during locking selftest
  locking/rwsem: Optimize rwsem structure for uncontended lock acquisition
  locking/rwsem: Enable lock event counting
  locking/lock_events: Don't show pvqspinlock events on bare metal
  locking/lock_events: Make lock_events available for all archs & other locks
  locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs
  locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro
  locking/rwsem: Add debug check for __down_read*()
  locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued()
  locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h
  ...
This commit is contained in:
Linus Torvalds
2019-05-06 13:50:15 -07:00
62 changed files with 989 additions and 1931 deletions

View File

@@ -3,7 +3,7 @@
# and is generally not a function of system call inputs.
KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o

View File

@@ -0,0 +1,179 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <waiman.long@hpe.com>
*/
/*
* Collect locking event counts
*/
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/fs.h>
#include "lock_events.h"
#undef LOCK_EVENT
#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
#define LOCK_EVENTS_DIR "lock_event_counts"
/*
* When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
* types of locks will be reported under the <debugfs>/lock_event_counts/
* directory. See lock_events_list.h for the list of available locking
* events.
*
* Writing to the special ".reset_counts" file will reset all the above
* locking event counts. This is a very slow operation and so should not
* be done frequently.
*
* These event counts are implemented as per-cpu variables which are
* summed and computed whenever the corresponding debugfs files are read. This
* minimizes added overhead making the counts usable even in a production
* environment.
*/
static const char * const lockevent_names[lockevent_num + 1] = {
#include "lock_events_list.h"
[LOCKEVENT_reset_cnts] = ".reset_counts",
};
/*
* Per-cpu counts
*/
DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
* The lockevent_read() function can be overridden.
*/
ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[64];
int cpu, id, len;
u64 sum = 0;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
id = (long)file_inode(file)->i_private;
if (id >= lockevent_num)
return -EBADF;
for_each_possible_cpu(cpu)
sum += per_cpu(lockevents[id], cpu);
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
/*
* Function to handle write request
*
* When idx = reset_cnts, reset all the counts.
*/
static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
int cpu;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
return count;
for_each_possible_cpu(cpu) {
int i;
unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
for (i = 0 ; i < lockevent_num; i++)
WRITE_ONCE(ptr[i], 0);
}
return count;
}
/*
* Debugfs data structures
*/
static const struct file_operations fops_lockevent = {
.read = lockevent_read,
.write = lockevent_write,
.llseek = default_llseek,
};
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#include <asm/paravirt.h>
static bool __init skip_lockevent(const char *name)
{
static int pv_on __initdata = -1;
if (pv_on < 0)
pv_on = !pv_is_native_spin_unlock();
/*
* Skip PV qspinlock events on bare metal.
*/
if (!pv_on && !memcmp(name, "pv_", 3))
return true;
return false;
}
#else
static inline bool skip_lockevent(const char *name)
{
return false;
}
#endif
/*
* Initialize debugfs for the locking event counts.
*/
static int __init init_lockevent_counts(void)
{
struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
int i;
if (!d_counts)
goto out;
/*
* Create the debugfs files
*
* As reading from and writing to the stat files can be slow, only
* root is allowed to do the read/write to limit impact to system
* performance.
*/
for (i = 0; i < lockevent_num; i++) {
if (skip_lockevent(lockevent_names[i]))
continue;
if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
(void *)(long)i, &fops_lockevent))
goto fail_undo;
}
if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
d_counts, (void *)(long)LOCKEVENT_reset_cnts,
&fops_lockevent))
goto fail_undo;
return 0;
fail_undo:
debugfs_remove_recursive(d_counts);
out:
pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
return -ENOMEM;
}
fs_initcall(init_lockevent_counts);

View File

@@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <longman@redhat.com>
*/
#ifndef __LOCKING_LOCK_EVENTS_H
#define __LOCKING_LOCK_EVENTS_H
enum lock_events {
#include "lock_events_list.h"
lockevent_num, /* Total number of lock event counts */
LOCKEVENT_reset_cnts = lockevent_num,
};
#ifdef CONFIG_LOCK_EVENT_COUNTS
/*
* Per-cpu counters
*/
DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
* Increment the PV qspinlock statistical counters
*/
static inline void __lockevent_inc(enum lock_events event, bool cond)
{
if (cond)
__this_cpu_inc(lockevents[event]);
}
#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
static inline void __lockevent_add(enum lock_events event, int inc)
{
__this_cpu_add(lockevents[event], inc);
}
#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
#else /* CONFIG_LOCK_EVENT_COUNTS */
#define lockevent_inc(ev)
#define lockevent_add(ev, c)
#define lockevent_cond_inc(ev, c)
#endif /* CONFIG_LOCK_EVENT_COUNTS */
#endif /* __LOCKING_LOCK_EVENTS_H */

View File

@@ -0,0 +1,67 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <longman@redhat.com>
*/
#ifndef LOCK_EVENT
#define LOCK_EVENT(name) LOCKEVENT_ ## name,
#endif
#ifdef CONFIG_QUEUED_SPINLOCKS
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/*
* Locking events for PV qspinlock.
*/
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
/*
* Locking events for qspinlock
*
* Subtracting lock_use_node[234] from lock_slowpath will give you
* lock_use_node1.
*/
LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Locking events for rwsem
*/
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */

View File

@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
{
char c = '.';
if (class->usage_mask & lock_flag(bit + 2))
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '+';
if (class->usage_mask & lock_flag(bit)) {
c = '-';
if (class->usage_mask & lock_flag(bit + 2))
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '?';
}
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target,
}
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
static inline int usage_accumulate(struct lock_list *entry, void *mask)
{
*(unsigned long *)mask |= entry->class->usage_mask;
return 0;
}
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
* without creating any illegal irq-safe -> irq-unsafe lock dependency.
*/
static inline int usage_match(struct lock_list *entry, void *bit)
static inline int usage_match(struct lock_list *entry, void *mask)
{
return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
return entry->class->usage_mask & *(unsigned long *)mask;
}
/*
* Find a node in the forwards-direction dependency sub-graph starting
* at @root->class that matches @bit.
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
* Return <0 on error.
*/
static int
find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_forwards_checks);
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
return result;
}
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
* Return <0 on error.
*/
static int
find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_backwards_checks);
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
return result;
}
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr,
return 0;
}
static int
check_usage(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next, enum lock_usage_bit bit_backwards,
enum lock_usage_bit bit_forwards, const char *irqclass)
{
int ret;
struct lock_list this, that;
struct lock_list *uninitialized_var(target_entry);
struct lock_list *uninitialized_var(target_entry1);
this.parent = NULL;
this.class = hlock_class(prev);
ret = find_usage_backwards(&this, bit_backwards, &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
that.parent = NULL;
that.class = hlock_class(next);
ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
return print_bad_irq_dependency(curr, &this, &that,
target_entry, target_entry1,
prev, next,
bit_backwards, bit_forwards, irqclass);
}
static const char *state_names[] = {
#define LOCKDEP_STATE(__STATE) \
__stringify(__STATE),
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = {
static inline const char *state_name(enum lock_usage_bit bit)
{
return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2];
if (bit & LOCK_USAGE_READ_MASK)
return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
else
return state_names[bit >> LOCK_USAGE_DIR_MASK];
}
/*
* The bit number is encoded like:
*
* bit0: 0 exclusive, 1 read lock
* bit1: 0 used in irq, 1 irq enabled
* bit2-n: state
*/
static int exclusive_bit(int new_bit)
{
int state = new_bit & LOCK_USAGE_STATE_MASK;
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit)
return state | (dir ^ LOCK_USAGE_DIR_MASK);
}
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next, enum lock_usage_bit bit)
/*
* Observe that when given a bitmask where each bitnr is encoded as above, a
* right shift of the mask transforms the individual bitnrs as -1 and
* conversely, a left shift transforms into +1 for the individual bitnrs.
*
* So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
* create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
* instead by subtracting the bit number by 2, or shifting the mask right by 2.
*
* Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
*
* So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
* all bits set) and recompose with bitnr1 flipped.
*/
static unsigned long invert_dir_mask(unsigned long mask)
{
/*
* Prove that the new dependency does not connect a hardirq-safe
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
*/
if (!check_usage(curr, prev, next, bit,
exclusive_bit(bit), state_name(bit)))
return 0;
unsigned long excl = 0;
bit++; /* _READ */
/* Invert dir */
excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
/*
* Prove that the new dependency does not connect a hardirq-safe-read
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
*/
if (!check_usage(curr, prev, next, bit,
exclusive_bit(bit), state_name(bit)))
return 0;
return 1;
return excl;
}
static int
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
/*
* As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
* bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
* And then mask out all bitnr0.
*/
static unsigned long exclusive_mask(unsigned long mask)
{
#define LOCKDEP_STATE(__STATE) \
if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
return 0;
#include "lockdep_states.h"
#undef LOCKDEP_STATE
unsigned long excl = invert_dir_mask(mask);
return 1;
/* Strip read */
excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
excl &= ~LOCKF_IRQ_READ;
return excl;
}
/*
* Retrieve the _possible_ original mask to which @mask is
* exclusive. Ie: this is the opposite of exclusive_mask().
* Note that 2 possible original bits can match an exclusive
* bit: one has LOCK_USAGE_READ_MASK set, the other has it
* cleared. So both are returned for each exclusive bit.
*/
static unsigned long original_mask(unsigned long mask)
{
unsigned long excl = invert_dir_mask(mask);
/* Include read in existing usages */
excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
return excl;
}
/*
* Find the first pair of bit match between an original
* usage mask and an exclusive usage mask.
*/
static int find_exclusive_match(unsigned long mask,
unsigned long excl_mask,
enum lock_usage_bit *bitp,
enum lock_usage_bit *excl_bitp)
{
int bit, excl;
for_each_set_bit(bit, &mask, LOCK_USED) {
excl = exclusive_bit(bit);
if (excl_mask & lock_flag(excl)) {
*bitp = bit;
*excl_bitp = excl;
return 0;
}
}
return -1;
}
/*
* Prove that the new dependency does not connect a hardirq-safe(-read)
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
*/
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
{
unsigned long usage_mask = 0, forward_mask, backward_mask;
enum lock_usage_bit forward_bit = 0, backward_bit = 0;
struct lock_list *uninitialized_var(target_entry1);
struct lock_list *uninitialized_var(target_entry);
struct lock_list this, that;
int ret;
/*
* Step 1: gather all hard/soft IRQs usages backward in an
* accumulated usage mask.
*/
this.parent = NULL;
this.class = hlock_class(prev);
ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
if (ret < 0)
return print_bfs_bug(ret);
usage_mask &= LOCKF_USED_IN_IRQ_ALL;
if (!usage_mask)
return 1;
/*
* Step 2: find exclusive uses forward that match the previous
* backward accumulated mask.
*/
forward_mask = exclusive_mask(usage_mask);
that.parent = NULL;
that.class = hlock_class(next);
ret = find_usage_forwards(&that, forward_mask, &target_entry1);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
/*
* Step 3: we found a bad match! Now retrieve a lock from the backward
* list whose usage mask matches the exclusive usage mask from the
* lock found on the forward list.
*/
backward_mask = original_mask(target_entry1->class->usage_mask);
ret = find_usage_backwards(&this, backward_mask, &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (DEBUG_LOCKS_WARN_ON(ret == 1))
return 1;
/*
* Step 4: narrow down to a pair of incompatible usage bits
* and report it.
*/
ret = find_exclusive_match(target_entry->class->usage_mask,
target_entry1->class->usage_mask,
&backward_bit, &forward_bit);
if (DEBUG_LOCKS_WARN_ON(ret == -1))
return 1;
return print_bad_irq_dependency(curr, &this, &that,
target_entry, target_entry1,
prev, next,
backward_bit, forward_bit,
state_name(backward_bit));
}
static void inc_chains(void)
@@ -2030,9 +2128,8 @@ static void inc_chains(void)
#else
static inline int
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
static inline int check_irq_usage(struct task_struct *curr,
struct held_lock *prev, struct held_lock *next)
{
return 1;
}
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
else if (unlikely(ret < 0))
return print_bfs_bug(ret);
if (!check_prev_add_irq(curr, prev, next))
if (!check_irq_usage(curr, prev, next))
return 0;
/*
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr)
#endif
}
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
static void
print_usage_bug_scenario(struct held_lock *lock)
{
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
return 1;
}
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_forwards(&root, bit, &target_entry);
ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_backwards(&root, bit, &target_entry);
ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
static inline int state_verbose(enum lock_usage_bit bit,
struct lock_class *class)
{
return state_verbose_f[bit >> 2](class);
return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
}
typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip)
/*
* See the fine text that goes along with this variable definition.
*/
if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
return;
/*

View File

@@ -42,13 +42,35 @@ enum {
__LOCKF(USED)
};
#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
static const unsigned long LOCKF_ENABLED_IRQ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKF_ENABLED_IRQ_READ \
(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
#define LOCKF_USED_IN_IRQ_READ \
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
static const unsigned long LOCKF_USED_IN_IRQ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
static const unsigned long LOCKF_ENABLED_IRQ_READ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
static const unsigned long LOCKF_USED_IN_IRQ_READ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
/*
* CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,

View File

@@ -7,6 +7,8 @@
#include <linux/sched.h>
#include <linux/errno.h>
#include "rwsem.h"
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{

View File

@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* 0,1,0 -> 0,0,1
*/
clear_pending_set_locked(lock);
qstat_inc(qstat_lock_pending, true);
lockevent_inc(lock_pending);
return;
/*
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* queuing.
*/
queue:
qstat_inc(qstat_lock_slowpath, true);
lockevent_inc(lock_slowpath);
pv_queue:
node = this_cpu_ptr(&qnodes[0].mcs);
idx = node->count++;
@@ -419,7 +419,7 @@ pv_queue:
* simple enough.
*/
if (unlikely(idx >= MAX_NODES)) {
qstat_inc(qstat_lock_no_node, true);
lockevent_inc(lock_no_node);
while (!queued_spin_trylock(lock))
cpu_relax();
goto release;
@@ -430,7 +430,7 @@ pv_queue:
/*
* Keep counts of non-zero index values:
*/
qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
/*
* Ensure that we increment the head node->count before initialising

View File

@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
if (!(val & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
qstat_inc(qstat_pv_lock_stealing, true);
lockevent_inc(pv_lock_stealing);
return true;
}
if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
qstat_hop(hopcnt);
lockevent_pv_hop(hopcnt);
return &he->lock;
}
}
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
smp_store_mb(pn->state, vcpu_halted);
if (!READ_ONCE(node->locked)) {
qstat_inc(qstat_pv_wait_node, true);
qstat_inc(qstat_pv_wait_early, wait_early);
lockevent_inc(pv_wait_node);
lockevent_cond_inc(pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
lockevent_cond_inc(pv_spurious_wakeup,
!READ_ONCE(node->locked));
}
/*
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
/*
* Tracking # of slowpath locking operations
*/
qstat_inc(qstat_lock_slowpath, true);
lockevent_inc(lock_slowpath);
for (;; waitcnt++) {
/*
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
}
}
WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
lockevent_inc(pv_wait_head);
lockevent_cond_inc(pv_wait_again, waitcnt);
pv_wait(&lock->locked, _Q_SLOW_VAL);
/*
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
qstat_inc(qstat_pv_kick_unlock, true);
lockevent_inc(pv_kick_unlock);
pv_kick(node->cpu);
}

View File

@@ -9,262 +9,105 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <waiman.long@hpe.com>
* Authors: Waiman Long <longman@redhat.com>
*/
/*
* When queued spinlock statistical counters are enabled, the following
* debugfs files will be created for reporting the counter values:
*
* <debugfs>/qlockstat/
* pv_hash_hops - average # of hops per hashing operation
* pv_kick_unlock - # of vCPU kicks issued at unlock time
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
* pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
* pv_wait_again - # of wait's after a queue head vCPU kick
* pv_wait_early - # of early vCPU wait's
* pv_wait_head - # of vCPU wait's at the queue head
* pv_wait_node - # of vCPU wait's at a non-head queue node
* lock_pending - # of locking operations via pending code
* lock_slowpath - # of locking operations via MCS lock queue
* lock_use_node2 - # of locking operations that use 2nd per-CPU node
* lock_use_node3 - # of locking operations that use 3rd per-CPU node
* lock_use_node4 - # of locking operations that use 4th per-CPU node
* lock_no_node - # of locking operations without using per-CPU node
*
* Subtracting lock_use_node[234] from lock_slowpath will give you
* lock_use_node1.
*
* Writing to the "reset_counters" file will reset all the above counter
* values.
*
* These statistical counters are implemented as per-cpu variables which are
* summed and computed whenever the corresponding debugfs files are read. This
* minimizes added overhead making the counters usable even in a production
* environment.
*
* There may be slight difference between pv_kick_wake and pv_kick_unlock.
*/
enum qlock_stats {
qstat_pv_hash_hops,
qstat_pv_kick_unlock,
qstat_pv_kick_wake,
qstat_pv_latency_kick,
qstat_pv_latency_wake,
qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup,
qstat_pv_wait_again,
qstat_pv_wait_early,
qstat_pv_wait_head,
qstat_pv_wait_node,
qstat_lock_pending,
qstat_lock_slowpath,
qstat_lock_use_node2,
qstat_lock_use_node3,
qstat_lock_use_node4,
qstat_lock_no_node,
qstat_num, /* Total number of statistical counters */
qstat_reset_cnts = qstat_num,
};
#include "lock_events.h"
#ifdef CONFIG_QUEUED_LOCK_STAT
#ifdef CONFIG_LOCK_EVENT_COUNTS
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/*
* Collect pvqspinlock statistics
* Collect pvqspinlock locking event counts
*/
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/fs.h>
static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_hash_hops] = "pv_hash_hops",
[qstat_pv_kick_unlock] = "pv_kick_unlock",
[qstat_pv_kick_wake] = "pv_kick_wake",
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake",
[qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early",
[qstat_pv_wait_head] = "pv_wait_head",
[qstat_pv_wait_node] = "pv_wait_node",
[qstat_lock_pending] = "lock_pending",
[qstat_lock_slowpath] = "lock_slowpath",
[qstat_lock_use_node2] = "lock_use_node2",
[qstat_lock_use_node3] = "lock_use_node3",
[qstat_lock_use_node4] = "lock_use_node4",
[qstat_lock_no_node] = "lock_no_node",
[qstat_reset_cnts] = "reset_counters",
};
#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
/*
* Per-cpu counters
* PV specific per-cpu counter
*/
static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
static DEFINE_PER_CPU(u64, pv_kick_time);
/*
* Function to read and return the qlock statistical counter values
* Function to read and return the PV qspinlock counts.
*
* The following counters are handled specially:
* 1. qstat_pv_latency_kick
* 1. pv_latency_kick
* Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
* 2. qstat_pv_latency_wake
* 2. pv_latency_wake
* Average wake latency (ns) = pv_latency_wake/pv_kick_wake
* 3. qstat_pv_hash_hops
* 3. pv_hash_hops
* Average hops/hash = pv_hash_hops/pv_kick_unlock
*/
static ssize_t qstat_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
ssize_t lockevent_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[64];
int cpu, counter, len;
u64 stat = 0, kicks = 0;
int cpu, id, len;
u64 sum = 0, kicks = 0;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
counter = (long)file_inode(file)->i_private;
id = (long)file_inode(file)->i_private;
if (counter >= qstat_num)
if (id >= lockevent_num)
return -EBADF;
for_each_possible_cpu(cpu) {
stat += per_cpu(qstats[counter], cpu);
sum += per_cpu(lockevents[id], cpu);
/*
* Need to sum additional counter for some of them
* Need to sum additional counters for some of them
*/
switch (counter) {
switch (id) {
case qstat_pv_latency_kick:
case qstat_pv_hash_hops:
kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
case LOCKEVENT_pv_latency_kick:
case LOCKEVENT_pv_hash_hops:
kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
break;
case qstat_pv_latency_wake:
kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
case LOCKEVENT_pv_latency_wake:
kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
break;
}
}
if (counter == qstat_pv_hash_hops) {
if (id == LOCKEVENT_pv_hash_hops) {
u64 frac = 0;
if (kicks) {
frac = 100ULL * do_div(stat, kicks);
frac = 100ULL * do_div(sum, kicks);
frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
}
/*
* Return a X.XX decimal number
*/
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
sum, frac);
} else {
/*
* Round to the nearest ns
*/
if ((counter == qstat_pv_latency_kick) ||
(counter == qstat_pv_latency_wake)) {
if ((id == LOCKEVENT_pv_latency_kick) ||
(id == LOCKEVENT_pv_latency_wake)) {
if (kicks)
stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
}
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
}
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
/*
* Function to handle write request
*
* When counter = reset_cnts, reset all the counter values.
* Since the counter updates aren't atomic, the resetting is done twice
* to make sure that the counters are very likely to be all cleared.
*/
static ssize_t qstat_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
int cpu;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
if ((long)file_inode(file)->i_private != qstat_reset_cnts)
return count;
for_each_possible_cpu(cpu) {
int i;
unsigned long *ptr = per_cpu_ptr(qstats, cpu);
for (i = 0 ; i < qstat_num; i++)
WRITE_ONCE(ptr[i], 0);
}
return count;
}
/*
* Debugfs data structures
*/
static const struct file_operations fops_qstat = {
.read = qstat_read,
.write = qstat_write,
.llseek = default_llseek,
};
/*
* Initialize debugfs for the qspinlock statistical counters
*/
static int __init init_qspinlock_stat(void)
{
struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
int i;
if (!d_qstat)
goto out;
/*
* Create the debugfs files
*
* As reading from and writing to the stat files can be slow, only
* root is allowed to do the read/write to limit impact to system
* performance.
*/
for (i = 0; i < qstat_num; i++)
if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
(void *)(long)i, &fops_qstat))
goto fail_undo;
if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
(void *)(long)qstat_reset_cnts, &fops_qstat))
goto fail_undo;
return 0;
fail_undo:
debugfs_remove_recursive(d_qstat);
out:
pr_warn("Could not create 'qlockstat' debugfs entries\n");
return -ENOMEM;
}
fs_initcall(init_qspinlock_stat);
/*
* Increment the PV qspinlock statistical counters
*/
static inline void qstat_inc(enum qlock_stats stat, bool cond)
{
if (cond)
this_cpu_inc(qstats[stat]);
}
/*
* PV hash hop count
*/
static inline void qstat_hop(int hopcnt)
static inline void lockevent_pv_hop(int hopcnt)
{
this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
}
/*
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu)
per_cpu(pv_kick_time, cpu) = start;
pv_kick(cpu);
this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
}
/*
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val)
*pkick_time = 0;
pv_wait(ptr, val);
if (*pkick_time) {
this_cpu_add(qstats[qstat_pv_latency_wake],
this_cpu_add(EVENT_COUNT(pv_latency_wake),
sched_clock() - *pkick_time);
qstat_inc(qstat_pv_kick_wake, true);
lockevent_inc(pv_kick_wake);
}
}
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)
#else /* CONFIG_QUEUED_LOCK_STAT */
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
static inline void qstat_hop(int hopcnt) { }
#else /* CONFIG_LOCK_EVENT_COUNTS */
#endif /* CONFIG_QUEUED_LOCK_STAT */
static inline void lockevent_pv_hop(int hopcnt) { }
#endif /* CONFIG_LOCK_EVENT_COUNTS */

View File

@@ -1,339 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
* generic spinlock implementation
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
* - Derived also from comments by Linus
*/
#include <linux/rwsem.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/export.h>
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
int rwsem_is_locked(struct rw_semaphore *sem)
{
int ret = 1;
unsigned long flags;
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
ret = (sem->count != 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
return ret;
}
EXPORT_SYMBOL(rwsem_is_locked);
/*
* initialise the semaphore
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = 0;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
}
EXPORT_SYMBOL(__init_rwsem);
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here, then:
* - the 'active count' _reached_ zero
* - the 'waiting count' is non-zero
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if wakewrite is non-zero
*/
static inline struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
int woken;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wakewrite)
/* Wake up a writer. Note that we do not grant it the
* lock - it will have to acquire it when it runs. */
wake_up_process(waiter->task);
goto out;
}
/* grant an infinite number of read locks to the front of the queue */
woken = 0;
do {
struct list_head *next = waiter->list.next;
list_del(&waiter->list);
tsk = waiter->task;
/*
* Make sure we do not wakeup the next reader before
* setting the nil condition to grant the next reader;
* otherwise we could miss the wakeup on the other
* side and end up sleeping again. See the pairing
* in rwsem_down_read_failed().
*/
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
woken++;
if (next == &sem->wait_list)
break;
waiter = list_entry(next, struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
sem->count += woken;
out:
return sem;
}
/*
* wake a single writer
*/
static inline struct rw_semaphore *
__rwsem_wake_one_writer(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
wake_up_process(waiter->task);
return sem;
}
/*
* get a read lock on the semaphore
*/
int __sched __down_read_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
goto out;
}
/* set up my own style of waitqueue */
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(current);
list_add_tail(&waiter.list, &sem->wait_list);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
break;
if (signal_pending_state(state, current))
goto out_nolock;
set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
out:
return 0;
out_nolock:
/*
* We didn't take the lock, so that there is a writer, which
* is owner or the first waiter of the sem. If it's a waiter,
* it will be woken by current owner. Not need to wake anybody.
*/
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return -EINTR;
}
void __sched __down_read(struct rw_semaphore *sem)
{
__down_read_common(sem, TASK_UNINTERRUPTIBLE);
}
int __sched __down_read_killable(struct rw_semaphore *sem)
{
return __down_read_common(sem, TASK_KILLABLE);
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int __down_read_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* get a write lock on the semaphore
*/
int __sched __down_write_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
/* wait for someone to release the lock */
for (;;) {
/*
* That is the key to support write lock stealing: allows the
* task already on CPU to get the lock soon rather than put
* itself into sleep and waiting for system woke it or someone
* else in the head of the wait list up.
*/
if (sem->count == 0)
break;
if (signal_pending_state(state, current))
goto out_nolock;
set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
out_nolock:
list_del(&waiter.list);
if (!list_empty(&sem->wait_list) && sem->count >= 0)
__rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return -EINTR;
}
void __sched __down_write(struct rw_semaphore *sem)
{
__down_write_common(sem, TASK_UNINTERRUPTIBLE);
}
int __sched __down_write_killable(struct rw_semaphore *sem)
{
return __down_write_common(sem, TASK_KILLABLE);
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int __down_write_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count == 0) {
/* got the lock */
sem->count = -1;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* release a read lock on the semaphore
*/
void __up_read(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (--sem->count == 0 && !list_empty(&sem->wait_list))
sem = __rwsem_wake_one_writer(sem);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* release a write lock on the semaphore
*/
void __up_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 0;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 1);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* downgrade a write lock into a read lock
* - just wake up any readers at the front of the queue
*/
void __downgrade_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 1;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}

View File

@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* will notice the queued writer.
*/
wake_q_add(wake_q, waiter->task);
lockevent_inc(rwsem_wake_writer);
}
return;
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
goto try_reader_grant;
}
/*
* It is not really necessary to set it to reader-owned here,
* but it gives the spinners an early indication that the
* readers now have the lock.
* Set it to reader-owned to give spinners an early
* indication that readers now have the lock.
*/
__rwsem_set_reader_owned(sem, waiter->task);
}
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
}
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
lockevent_cond_inc(rwsem_wake_reader, woken);
if (list_empty(&sem->wait_list)) {
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
@@ -224,92 +225,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
atomic_long_add(adjustment, &sem->count);
}
/*
* Wait for the read lock to be granted
*/
static inline struct rw_semaphore __sched *
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
* been set in the count.
*/
if (atomic_long_read(&sem->count) >= 0) {
raw_spin_unlock_irq(&sem->wait_lock);
return sem;
}
adjustment += RWSEM_WAITING_BIAS;
}
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
/*
* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
/* wait to be given the lock */
while (true) {
set_current_state(state);
if (!waiter.task)
break;
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
schedule();
}
__set_current_state(TASK_RUNNING);
return sem;
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
raw_spin_unlock_irq(&sem->wait_lock);
__set_current_state(TASK_RUNNING);
return ERR_PTR(-EINTR);
}
__visible struct rw_semaphore * __sched
rwsem_down_read_failed(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);
__visible struct rw_semaphore * __sched
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
long old, count = atomic_long_read(&sem->count);
long count = atomic_long_read(&sem->count);
while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
old = atomic_long_cmpxchg_acquire(&sem->count, count,
count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
while (!count || count == RWSEM_WAITING_BIAS) {
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
count + RWSEM_ACTIVE_WRITE_BIAS)) {
rwsem_set_owner(sem);
lockevent_inc(rwsem_opt_wlock);
return true;
}
count = old;
}
return false;
}
static inline bool owner_on_cpu(struct task_struct *owner)
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
osq_unlock(&sem->osq);
done:
preempt_enable();
lockevent_cond_inc(rwsem_opt_fail, !taken);
return taken;
}
@@ -504,6 +416,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
}
#endif
/*
* Wait for the read lock to be granted
*/
static inline struct rw_semaphore __sched *
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
* been set in the count.
*/
if (atomic_long_read(&sem->count) >= 0) {
raw_spin_unlock_irq(&sem->wait_lock);
rwsem_set_reader_owned(sem);
lockevent_inc(rwsem_rlock_fast);
return sem;
}
adjustment += RWSEM_WAITING_BIAS;
}
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
/*
* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
/* wait to be given the lock */
while (true) {
set_current_state(state);
if (!waiter.task)
break;
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
schedule();
lockevent_inc(rwsem_sleep_reader);
}
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
return sem;
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
raw_spin_unlock_irq(&sem->wait_lock);
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock_fail);
return ERR_PTR(-EINTR);
}
__visible struct rw_semaphore * __sched
rwsem_down_read_failed(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);
__visible struct rw_semaphore * __sched
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
/*
* Wait until we successfully acquire the write lock
*/
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
goto out_nolock;
schedule();
lockevent_inc(rwsem_sleep_writer);
set_current_state(state);
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
return ret;
@@ -601,6 +606,7 @@ out_nolock:
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
lockevent_inc(rwsem_wlock_fail);
return ERR_PTR(-EINTR);
}

View File

@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read);
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
return -EINTR;
}
rwsem_set_reader_owned(sem);
return 0;
}
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
if (ret == 1) {
if (ret == 1)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_reader_owned(sem);
}
return ret;
}
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
return -EINTR;
}
rwsem_set_owner(sem);
return 0;
}
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
if (ret == 1) {
if (ret == 1)
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_owner(sem);
}
return ret;
}
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock);
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
rwsem_clear_reader_owned(sem);
__up_read(sem);
}
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read);
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
rwsem_clear_owner(sem);
__up_write(sem);
}
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write);
void downgrade_write(struct rw_semaphore *sem)
{
lock_downgrade(&sem->dep_map, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read_nested);
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
return -EINTR;
}
rwsem_set_owner(sem);
return 0;
}
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
sem);
__up_read(sem);
}

View File

@@ -23,15 +23,44 @@
* is involved. Ideally we would like to track all the readers that own
* a rwsem, but the overhead is simply too big.
*/
#include "lock_events.h"
#define RWSEM_READER_OWNED (1UL << 0)
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
if (!debug_locks_silent && \
WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
#c, atomic_long_read(&(sem)->count), \
(long)((sem)->owner), (long)current, \
list_empty(&(sem)->wait_list) ? "" : "not ")) \
debug_locks_off(); \
} while (0)
#else
# define DEBUG_RWSEMS_WARN_ON(c)
# define DEBUG_RWSEMS_WARN_ON(c, sem)
#endif
/*
* R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
* Adapted largely from include/asm-i386/rwsem.h
* by Paul Mackerras <paulus@samba.org>.
*/
/*
* the semaphore definition
*/
#ifdef CONFIG_64BIT
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* All writes to owner are protected by WRITE_ONCE() to make sure that
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
}
#endif
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
rwsem_down_read_failed(sem);
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
RWSEM_READER_OWNED), sem);
} else {
rwsem_set_reader_owned(sem);
}
}
static inline int __down_read_killable(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
return -EINTR;
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
RWSEM_READER_OWNED), sem);
} else {
rwsem_set_reader_owned(sem);
}
return 0;
}
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
/*
* Optimize for the case when the rwsem is not locked at all.
*/
long tmp = RWSEM_UNLOCKED_VALUE;
lockevent_inc(rwsem_rtrylock);
do {
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
tmp + RWSEM_ACTIVE_READ_BIAS)) {
rwsem_set_reader_owned(sem);
return 1;
}
} while (tmp >= 0);
return 0;
}
/*
* lock for writing
*/
static inline void __down_write(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
rwsem_set_owner(sem);
}
static inline int __down_write_killable(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
rwsem_set_owner(sem);
return 0;
}
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long tmp;
lockevent_inc(rwsem_wtrylock);
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (tmp == RWSEM_UNLOCKED_VALUE) {
rwsem_set_owner(sem);
return true;
}
return false;
}
/*
* unlock after reading
*/
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
sem);
rwsem_clear_reader_owned(sem);
tmp = atomic_long_dec_return_release(&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
/*
* unlock after writing
*/
static inline void __up_write(struct rw_semaphore *sem)
{
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
rwsem_clear_owner(sem);
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count) < 0))
rwsem_wake(sem);
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
long tmp;
/*
* When downgrading from exclusive to shared ownership,
* anything inside the write-locked region cannot leak
* into the read side. In contrast, anything in the
* read-locked region is ok to be re-ordered into the
* write side. As such, rely on RELEASE semantics.
*/
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
if (tmp < 0)
rwsem_downgrade_wake(sem);
}