Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"Here are the locking changes in this cycle:
- rwsem unification and simpler micro-optimizations to prepare for
more intrusive (and more lucrative) scalability improvements in
v5.3 (Waiman Long)
- Lockdep irq state tracking flag usage cleanups (Frederic
Weisbecker)
- static key improvements (Jakub Kicinski, Peter Zijlstra)
- misc updates, cleanups and smaller fixes"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
locking/lockdep: Remove unnecessary unlikely()
locking/static_key: Don't take sleeping locks in __static_key_slow_dec_deferred()
locking/static_key: Factor out the fast path of static_key_slow_dec()
locking/static_key: Add support for deferred static branches
locking/lockdep: Test all incompatible scenarios at once in check_irq_usage()
locking/lockdep: Avoid bogus Clang warning
locking/lockdep: Generate LOCKF_ bit composites
locking/lockdep: Use expanded masks on find_usage_*() functions
locking/lockdep: Map remaining magic numbers to lock usage mask names
locking/lockdep: Move valid_state() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
locking/rwsem: Prevent unneeded warning during locking selftest
locking/rwsem: Optimize rwsem structure for uncontended lock acquisition
locking/rwsem: Enable lock event counting
locking/lock_events: Don't show pvqspinlock events on bare metal
locking/lock_events: Make lock_events available for all archs & other locks
locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs
locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro
locking/rwsem: Add debug check for __down_read*()
locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued()
locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h
...
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
# and is generally not a function of system call inputs.
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
|
||||
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
|
||||
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
||||
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
|
||||
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
|
||||
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
|
||||
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
|
||||
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
|
||||
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
|
||||
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
|
||||
obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
|
||||
|
||||
179
kernel/locking/lock_events.c
Normal file
179
kernel/locking/lock_events.c
Normal file
@@ -0,0 +1,179 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hpe.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Collect locking event counts
|
||||
*/
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "lock_events.h"
|
||||
|
||||
#undef LOCK_EVENT
|
||||
#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
|
||||
|
||||
#define LOCK_EVENTS_DIR "lock_event_counts"
|
||||
|
||||
/*
|
||||
* When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
|
||||
* types of locks will be reported under the <debugfs>/lock_event_counts/
|
||||
* directory. See lock_events_list.h for the list of available locking
|
||||
* events.
|
||||
*
|
||||
* Writing to the special ".reset_counts" file will reset all the above
|
||||
* locking event counts. This is a very slow operation and so should not
|
||||
* be done frequently.
|
||||
*
|
||||
* These event counts are implemented as per-cpu variables which are
|
||||
* summed and computed whenever the corresponding debugfs files are read. This
|
||||
* minimizes added overhead making the counts usable even in a production
|
||||
* environment.
|
||||
*/
|
||||
static const char * const lockevent_names[lockevent_num + 1] = {
|
||||
|
||||
#include "lock_events_list.h"
|
||||
|
||||
[LOCKEVENT_reset_cnts] = ".reset_counts",
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-cpu counts
|
||||
*/
|
||||
DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
|
||||
|
||||
/*
|
||||
* The lockevent_read() function can be overridden.
|
||||
*/
|
||||
ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
int cpu, id, len;
|
||||
u64 sum = 0;
|
||||
|
||||
/*
|
||||
* Get the counter ID stored in file->f_inode->i_private
|
||||
*/
|
||||
id = (long)file_inode(file)->i_private;
|
||||
|
||||
if (id >= lockevent_num)
|
||||
return -EBADF;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
sum += per_cpu(lockevents[id], cpu);
|
||||
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
|
||||
|
||||
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to handle write request
|
||||
*
|
||||
* When idx = reset_cnts, reset all the counts.
|
||||
*/
|
||||
static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* Get the counter ID stored in file->f_inode->i_private
|
||||
*/
|
||||
if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
|
||||
return count;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
int i;
|
||||
unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
|
||||
|
||||
for (i = 0 ; i < lockevent_num; i++)
|
||||
WRITE_ONCE(ptr[i], 0);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Debugfs data structures
|
||||
*/
|
||||
static const struct file_operations fops_lockevent = {
|
||||
.read = lockevent_read,
|
||||
.write = lockevent_write,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
static bool __init skip_lockevent(const char *name)
|
||||
{
|
||||
static int pv_on __initdata = -1;
|
||||
|
||||
if (pv_on < 0)
|
||||
pv_on = !pv_is_native_spin_unlock();
|
||||
/*
|
||||
* Skip PV qspinlock events on bare metal.
|
||||
*/
|
||||
if (!pv_on && !memcmp(name, "pv_", 3))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static inline bool skip_lockevent(const char *name)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize debugfs for the locking event counts.
|
||||
*/
|
||||
static int __init init_lockevent_counts(void)
|
||||
{
|
||||
struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
|
||||
int i;
|
||||
|
||||
if (!d_counts)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Create the debugfs files
|
||||
*
|
||||
* As reading from and writing to the stat files can be slow, only
|
||||
* root is allowed to do the read/write to limit impact to system
|
||||
* performance.
|
||||
*/
|
||||
for (i = 0; i < lockevent_num; i++) {
|
||||
if (skip_lockevent(lockevent_names[i]))
|
||||
continue;
|
||||
if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
|
||||
(void *)(long)i, &fops_lockevent))
|
||||
goto fail_undo;
|
||||
}
|
||||
|
||||
if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
|
||||
d_counts, (void *)(long)LOCKEVENT_reset_cnts,
|
||||
&fops_lockevent))
|
||||
goto fail_undo;
|
||||
|
||||
return 0;
|
||||
fail_undo:
|
||||
debugfs_remove_recursive(d_counts);
|
||||
out:
|
||||
pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
|
||||
return -ENOMEM;
|
||||
}
|
||||
fs_initcall(init_lockevent_counts);
|
||||
59
kernel/locking/lock_events.h
Normal file
59
kernel/locking/lock_events.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* Authors: Waiman Long <longman@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef __LOCKING_LOCK_EVENTS_H
|
||||
#define __LOCKING_LOCK_EVENTS_H
|
||||
|
||||
enum lock_events {
|
||||
|
||||
#include "lock_events_list.h"
|
||||
|
||||
lockevent_num, /* Total number of lock event counts */
|
||||
LOCKEVENT_reset_cnts = lockevent_num,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_LOCK_EVENT_COUNTS
|
||||
/*
|
||||
* Per-cpu counters
|
||||
*/
|
||||
DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
|
||||
|
||||
/*
|
||||
* Increment the PV qspinlock statistical counters
|
||||
*/
|
||||
static inline void __lockevent_inc(enum lock_events event, bool cond)
|
||||
{
|
||||
if (cond)
|
||||
__this_cpu_inc(lockevents[event]);
|
||||
}
|
||||
|
||||
#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
|
||||
#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
|
||||
|
||||
static inline void __lockevent_add(enum lock_events event, int inc)
|
||||
{
|
||||
__this_cpu_add(lockevents[event], inc);
|
||||
}
|
||||
|
||||
#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
|
||||
|
||||
#else /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
#define lockevent_inc(ev)
|
||||
#define lockevent_add(ev, c)
|
||||
#define lockevent_cond_inc(ev, c)
|
||||
|
||||
#endif /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
#endif /* __LOCKING_LOCK_EVENTS_H */
|
||||
67
kernel/locking/lock_events_list.h
Normal file
67
kernel/locking/lock_events_list.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* Authors: Waiman Long <longman@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef LOCK_EVENT
|
||||
#define LOCK_EVENT(name) LOCKEVENT_ ## name,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
/*
|
||||
* Locking events for PV qspinlock.
|
||||
*/
|
||||
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
|
||||
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
|
||||
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
|
||||
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
|
||||
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
|
||||
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
|
||||
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
|
||||
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
|
||||
LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
|
||||
LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
|
||||
LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
|
||||
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
|
||||
|
||||
/*
|
||||
* Locking events for qspinlock
|
||||
*
|
||||
* Subtracting lock_use_node[234] from lock_slowpath will give you
|
||||
* lock_use_node1.
|
||||
*/
|
||||
LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
|
||||
LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
|
||||
LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
|
||||
LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
|
||||
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
|
||||
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
|
||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
/*
|
||||
* Locking events for rwsem
|
||||
*/
|
||||
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
|
||||
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
|
||||
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
|
||||
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
|
||||
LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
|
||||
LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
|
||||
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
|
||||
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
|
||||
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
|
||||
LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
|
||||
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
|
||||
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
|
||||
LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
|
||||
@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
|
||||
{
|
||||
char c = '.';
|
||||
|
||||
if (class->usage_mask & lock_flag(bit + 2))
|
||||
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
|
||||
c = '+';
|
||||
if (class->usage_mask & lock_flag(bit)) {
|
||||
c = '-';
|
||||
if (class->usage_mask & lock_flag(bit + 2))
|
||||
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
|
||||
c = '?';
|
||||
}
|
||||
|
||||
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target,
|
||||
}
|
||||
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
|
||||
|
||||
static inline int usage_accumulate(struct lock_list *entry, void *mask)
|
||||
{
|
||||
*(unsigned long *)mask |= entry->class->usage_mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forwards and backwards subgraph searching, for the purposes of
|
||||
* proving that two subgraphs can be connected by a new dependency
|
||||
* without creating any illegal irq-safe -> irq-unsafe lock dependency.
|
||||
*/
|
||||
|
||||
static inline int usage_match(struct lock_list *entry, void *bit)
|
||||
static inline int usage_match(struct lock_list *entry, void *mask)
|
||||
{
|
||||
return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
|
||||
return entry->class->usage_mask & *(unsigned long *)mask;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Find a node in the forwards-direction dependency sub-graph starting
|
||||
* at @root->class that matches @bit.
|
||||
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
|
||||
* Return <0 on error.
|
||||
*/
|
||||
static int
|
||||
find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
|
||||
find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
|
||||
struct lock_list **target_entry)
|
||||
{
|
||||
int result;
|
||||
|
||||
debug_atomic_inc(nr_find_usage_forwards_checks);
|
||||
|
||||
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
|
||||
result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
|
||||
* Return <0 on error.
|
||||
*/
|
||||
static int
|
||||
find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
|
||||
find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
|
||||
struct lock_list **target_entry)
|
||||
{
|
||||
int result;
|
||||
|
||||
debug_atomic_inc(nr_find_usage_backwards_checks);
|
||||
|
||||
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
|
||||
result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
check_usage(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next, enum lock_usage_bit bit_backwards,
|
||||
enum lock_usage_bit bit_forwards, const char *irqclass)
|
||||
{
|
||||
int ret;
|
||||
struct lock_list this, that;
|
||||
struct lock_list *uninitialized_var(target_entry);
|
||||
struct lock_list *uninitialized_var(target_entry1);
|
||||
|
||||
this.parent = NULL;
|
||||
|
||||
this.class = hlock_class(prev);
|
||||
ret = find_usage_backwards(&this, bit_backwards, &target_entry);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (ret == 1)
|
||||
return ret;
|
||||
|
||||
that.parent = NULL;
|
||||
that.class = hlock_class(next);
|
||||
ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (ret == 1)
|
||||
return ret;
|
||||
|
||||
return print_bad_irq_dependency(curr, &this, &that,
|
||||
target_entry, target_entry1,
|
||||
prev, next,
|
||||
bit_backwards, bit_forwards, irqclass);
|
||||
}
|
||||
|
||||
static const char *state_names[] = {
|
||||
#define LOCKDEP_STATE(__STATE) \
|
||||
__stringify(__STATE),
|
||||
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = {
|
||||
|
||||
static inline const char *state_name(enum lock_usage_bit bit)
|
||||
{
|
||||
return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2];
|
||||
if (bit & LOCK_USAGE_READ_MASK)
|
||||
return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
|
||||
else
|
||||
return state_names[bit >> LOCK_USAGE_DIR_MASK];
|
||||
}
|
||||
|
||||
/*
|
||||
* The bit number is encoded like:
|
||||
*
|
||||
* bit0: 0 exclusive, 1 read lock
|
||||
* bit1: 0 used in irq, 1 irq enabled
|
||||
* bit2-n: state
|
||||
*/
|
||||
static int exclusive_bit(int new_bit)
|
||||
{
|
||||
int state = new_bit & LOCK_USAGE_STATE_MASK;
|
||||
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit)
|
||||
return state | (dir ^ LOCK_USAGE_DIR_MASK);
|
||||
}
|
||||
|
||||
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next, enum lock_usage_bit bit)
|
||||
/*
|
||||
* Observe that when given a bitmask where each bitnr is encoded as above, a
|
||||
* right shift of the mask transforms the individual bitnrs as -1 and
|
||||
* conversely, a left shift transforms into +1 for the individual bitnrs.
|
||||
*
|
||||
* So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
|
||||
* create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
|
||||
* instead by subtracting the bit number by 2, or shifting the mask right by 2.
|
||||
*
|
||||
* Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
|
||||
*
|
||||
* So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
|
||||
* all bits set) and recompose with bitnr1 flipped.
|
||||
*/
|
||||
static unsigned long invert_dir_mask(unsigned long mask)
|
||||
{
|
||||
/*
|
||||
* Prove that the new dependency does not connect a hardirq-safe
|
||||
* lock with a hardirq-unsafe lock - to achieve this we search
|
||||
* the backwards-subgraph starting at <prev>, and the
|
||||
* forwards-subgraph starting at <next>:
|
||||
*/
|
||||
if (!check_usage(curr, prev, next, bit,
|
||||
exclusive_bit(bit), state_name(bit)))
|
||||
return 0;
|
||||
unsigned long excl = 0;
|
||||
|
||||
bit++; /* _READ */
|
||||
/* Invert dir */
|
||||
excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
|
||||
excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
|
||||
|
||||
/*
|
||||
* Prove that the new dependency does not connect a hardirq-safe-read
|
||||
* lock with a hardirq-unsafe lock - to achieve this we search
|
||||
* the backwards-subgraph starting at <prev>, and the
|
||||
* forwards-subgraph starting at <next>:
|
||||
*/
|
||||
if (!check_usage(curr, prev, next, bit,
|
||||
exclusive_bit(bit), state_name(bit)))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
return excl;
|
||||
}
|
||||
|
||||
static int
|
||||
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next)
|
||||
/*
|
||||
* As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
|
||||
* bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
|
||||
* And then mask out all bitnr0.
|
||||
*/
|
||||
static unsigned long exclusive_mask(unsigned long mask)
|
||||
{
|
||||
#define LOCKDEP_STATE(__STATE) \
|
||||
if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
|
||||
return 0;
|
||||
#include "lockdep_states.h"
|
||||
#undef LOCKDEP_STATE
|
||||
unsigned long excl = invert_dir_mask(mask);
|
||||
|
||||
return 1;
|
||||
/* Strip read */
|
||||
excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
|
||||
excl &= ~LOCKF_IRQ_READ;
|
||||
|
||||
return excl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the _possible_ original mask to which @mask is
|
||||
* exclusive. Ie: this is the opposite of exclusive_mask().
|
||||
* Note that 2 possible original bits can match an exclusive
|
||||
* bit: one has LOCK_USAGE_READ_MASK set, the other has it
|
||||
* cleared. So both are returned for each exclusive bit.
|
||||
*/
|
||||
static unsigned long original_mask(unsigned long mask)
|
||||
{
|
||||
unsigned long excl = invert_dir_mask(mask);
|
||||
|
||||
/* Include read in existing usages */
|
||||
excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
|
||||
|
||||
return excl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the first pair of bit match between an original
|
||||
* usage mask and an exclusive usage mask.
|
||||
*/
|
||||
static int find_exclusive_match(unsigned long mask,
|
||||
unsigned long excl_mask,
|
||||
enum lock_usage_bit *bitp,
|
||||
enum lock_usage_bit *excl_bitp)
|
||||
{
|
||||
int bit, excl;
|
||||
|
||||
for_each_set_bit(bit, &mask, LOCK_USED) {
|
||||
excl = exclusive_bit(bit);
|
||||
if (excl_mask & lock_flag(excl)) {
|
||||
*bitp = bit;
|
||||
*excl_bitp = excl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prove that the new dependency does not connect a hardirq-safe(-read)
|
||||
* lock with a hardirq-unsafe lock - to achieve this we search
|
||||
* the backwards-subgraph starting at <prev>, and the
|
||||
* forwards-subgraph starting at <next>:
|
||||
*/
|
||||
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next)
|
||||
{
|
||||
unsigned long usage_mask = 0, forward_mask, backward_mask;
|
||||
enum lock_usage_bit forward_bit = 0, backward_bit = 0;
|
||||
struct lock_list *uninitialized_var(target_entry1);
|
||||
struct lock_list *uninitialized_var(target_entry);
|
||||
struct lock_list this, that;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Step 1: gather all hard/soft IRQs usages backward in an
|
||||
* accumulated usage mask.
|
||||
*/
|
||||
this.parent = NULL;
|
||||
this.class = hlock_class(prev);
|
||||
|
||||
ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
|
||||
usage_mask &= LOCKF_USED_IN_IRQ_ALL;
|
||||
if (!usage_mask)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Step 2: find exclusive uses forward that match the previous
|
||||
* backward accumulated mask.
|
||||
*/
|
||||
forward_mask = exclusive_mask(usage_mask);
|
||||
|
||||
that.parent = NULL;
|
||||
that.class = hlock_class(next);
|
||||
|
||||
ret = find_usage_forwards(&that, forward_mask, &target_entry1);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (ret == 1)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Step 3: we found a bad match! Now retrieve a lock from the backward
|
||||
* list whose usage mask matches the exclusive usage mask from the
|
||||
* lock found on the forward list.
|
||||
*/
|
||||
backward_mask = original_mask(target_entry1->class->usage_mask);
|
||||
|
||||
ret = find_usage_backwards(&this, backward_mask, &target_entry);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (DEBUG_LOCKS_WARN_ON(ret == 1))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Step 4: narrow down to a pair of incompatible usage bits
|
||||
* and report it.
|
||||
*/
|
||||
ret = find_exclusive_match(target_entry->class->usage_mask,
|
||||
target_entry1->class->usage_mask,
|
||||
&backward_bit, &forward_bit);
|
||||
if (DEBUG_LOCKS_WARN_ON(ret == -1))
|
||||
return 1;
|
||||
|
||||
return print_bad_irq_dependency(curr, &this, &that,
|
||||
target_entry, target_entry1,
|
||||
prev, next,
|
||||
backward_bit, forward_bit,
|
||||
state_name(backward_bit));
|
||||
}
|
||||
|
||||
static void inc_chains(void)
|
||||
@@ -2030,9 +2128,8 @@ static void inc_chains(void)
|
||||
|
||||
#else
|
||||
|
||||
static inline int
|
||||
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next)
|
||||
static inline int check_irq_usage(struct task_struct *curr,
|
||||
struct held_lock *prev, struct held_lock *next)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
else if (unlikely(ret < 0))
|
||||
return print_bfs_bug(ret);
|
||||
|
||||
if (!check_prev_add_irq(curr, prev, next))
|
||||
if (!check_irq_usage(curr, prev, next))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
enum lock_usage_bit new_bit);
|
||||
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
|
||||
|
||||
|
||||
static void
|
||||
print_usage_bug_scenario(struct held_lock *lock)
|
||||
{
|
||||
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
enum lock_usage_bit new_bit);
|
||||
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
|
||||
|
||||
/*
|
||||
* print irq inversion bug:
|
||||
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
|
||||
|
||||
root.parent = NULL;
|
||||
root.class = hlock_class(this);
|
||||
ret = find_usage_forwards(&root, bit, &target_entry);
|
||||
ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (ret == 1)
|
||||
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
|
||||
|
||||
root.parent = NULL;
|
||||
root.class = hlock_class(this);
|
||||
ret = find_usage_backwards(&root, bit, &target_entry);
|
||||
ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
|
||||
if (ret < 0)
|
||||
return print_bfs_bug(ret);
|
||||
if (ret == 1)
|
||||
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
|
||||
static inline int state_verbose(enum lock_usage_bit bit,
|
||||
struct lock_class *class)
|
||||
{
|
||||
return state_verbose_f[bit >> 2](class);
|
||||
return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
|
||||
}
|
||||
|
||||
typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
|
||||
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip)
|
||||
/*
|
||||
* See the fine text that goes along with this variable definition.
|
||||
*/
|
||||
if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
|
||||
if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
||||
@@ -42,13 +42,35 @@ enum {
|
||||
__LOCKF(USED)
|
||||
};
|
||||
|
||||
#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
|
||||
#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
|
||||
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
|
||||
static const unsigned long LOCKF_ENABLED_IRQ =
|
||||
#include "lockdep_states.h"
|
||||
0;
|
||||
#undef LOCKDEP_STATE
|
||||
|
||||
#define LOCKF_ENABLED_IRQ_READ \
|
||||
(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
|
||||
#define LOCKF_USED_IN_IRQ_READ \
|
||||
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
|
||||
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
|
||||
static const unsigned long LOCKF_USED_IN_IRQ =
|
||||
#include "lockdep_states.h"
|
||||
0;
|
||||
#undef LOCKDEP_STATE
|
||||
|
||||
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
|
||||
static const unsigned long LOCKF_ENABLED_IRQ_READ =
|
||||
#include "lockdep_states.h"
|
||||
0;
|
||||
#undef LOCKDEP_STATE
|
||||
|
||||
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
|
||||
static const unsigned long LOCKF_USED_IN_IRQ_READ =
|
||||
#include "lockdep_states.h"
|
||||
0;
|
||||
#undef LOCKDEP_STATE
|
||||
|
||||
#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
|
||||
#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
|
||||
|
||||
#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
|
||||
#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
|
||||
|
||||
/*
|
||||
* CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
#include "rwsem.h"
|
||||
|
||||
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
|
||||
const char *name, struct lock_class_key *rwsem_key)
|
||||
{
|
||||
|
||||
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* 0,1,0 -> 0,0,1
|
||||
*/
|
||||
clear_pending_set_locked(lock);
|
||||
qstat_inc(qstat_lock_pending, true);
|
||||
lockevent_inc(lock_pending);
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* queuing.
|
||||
*/
|
||||
queue:
|
||||
qstat_inc(qstat_lock_slowpath, true);
|
||||
lockevent_inc(lock_slowpath);
|
||||
pv_queue:
|
||||
node = this_cpu_ptr(&qnodes[0].mcs);
|
||||
idx = node->count++;
|
||||
@@ -419,7 +419,7 @@ pv_queue:
|
||||
* simple enough.
|
||||
*/
|
||||
if (unlikely(idx >= MAX_NODES)) {
|
||||
qstat_inc(qstat_lock_no_node, true);
|
||||
lockevent_inc(lock_no_node);
|
||||
while (!queued_spin_trylock(lock))
|
||||
cpu_relax();
|
||||
goto release;
|
||||
@@ -430,7 +430,7 @@ pv_queue:
|
||||
/*
|
||||
* Keep counts of non-zero index values:
|
||||
*/
|
||||
qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
|
||||
lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
|
||||
|
||||
/*
|
||||
* Ensure that we increment the head node->count before initialising
|
||||
|
||||
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
|
||||
|
||||
if (!(val & _Q_LOCKED_PENDING_MASK) &&
|
||||
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
|
||||
qstat_inc(qstat_pv_lock_stealing, true);
|
||||
lockevent_inc(pv_lock_stealing);
|
||||
return true;
|
||||
}
|
||||
if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
|
||||
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
|
||||
hopcnt++;
|
||||
if (!cmpxchg(&he->lock, NULL, lock)) {
|
||||
WRITE_ONCE(he->node, node);
|
||||
qstat_hop(hopcnt);
|
||||
lockevent_pv_hop(hopcnt);
|
||||
return &he->lock;
|
||||
}
|
||||
}
|
||||
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
|
||||
smp_store_mb(pn->state, vcpu_halted);
|
||||
|
||||
if (!READ_ONCE(node->locked)) {
|
||||
qstat_inc(qstat_pv_wait_node, true);
|
||||
qstat_inc(qstat_pv_wait_early, wait_early);
|
||||
lockevent_inc(pv_wait_node);
|
||||
lockevent_cond_inc(pv_wait_early, wait_early);
|
||||
pv_wait(&pn->state, vcpu_halted);
|
||||
}
|
||||
|
||||
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
|
||||
* So it is better to spin for a while in the hope that the
|
||||
* MCS lock will be released soon.
|
||||
*/
|
||||
qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
|
||||
lockevent_cond_inc(pv_spurious_wakeup,
|
||||
!READ_ONCE(node->locked));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
/*
|
||||
* Tracking # of slowpath locking operations
|
||||
*/
|
||||
qstat_inc(qstat_lock_slowpath, true);
|
||||
lockevent_inc(lock_slowpath);
|
||||
|
||||
for (;; waitcnt++) {
|
||||
/*
|
||||
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(pn->state, vcpu_hashed);
|
||||
qstat_inc(qstat_pv_wait_head, true);
|
||||
qstat_inc(qstat_pv_wait_again, waitcnt);
|
||||
lockevent_inc(pv_wait_head);
|
||||
lockevent_cond_inc(pv_wait_again, waitcnt);
|
||||
pv_wait(&lock->locked, _Q_SLOW_VAL);
|
||||
|
||||
/*
|
||||
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
|
||||
* vCPU is harmless other than the additional latency in completing
|
||||
* the unlock.
|
||||
*/
|
||||
qstat_inc(qstat_pv_kick_unlock, true);
|
||||
lockevent_inc(pv_kick_unlock);
|
||||
pv_kick(node->cpu);
|
||||
}
|
||||
|
||||
|
||||
@@ -9,262 +9,105 @@
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hpe.com>
|
||||
* Authors: Waiman Long <longman@redhat.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* When queued spinlock statistical counters are enabled, the following
|
||||
* debugfs files will be created for reporting the counter values:
|
||||
*
|
||||
* <debugfs>/qlockstat/
|
||||
* pv_hash_hops - average # of hops per hashing operation
|
||||
* pv_kick_unlock - # of vCPU kicks issued at unlock time
|
||||
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
|
||||
* pv_latency_kick - average latency (ns) of vCPU kick operation
|
||||
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
|
||||
* pv_lock_stealing - # of lock stealing operations
|
||||
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
|
||||
* pv_wait_again - # of wait's after a queue head vCPU kick
|
||||
* pv_wait_early - # of early vCPU wait's
|
||||
* pv_wait_head - # of vCPU wait's at the queue head
|
||||
* pv_wait_node - # of vCPU wait's at a non-head queue node
|
||||
* lock_pending - # of locking operations via pending code
|
||||
* lock_slowpath - # of locking operations via MCS lock queue
|
||||
* lock_use_node2 - # of locking operations that use 2nd per-CPU node
|
||||
* lock_use_node3 - # of locking operations that use 3rd per-CPU node
|
||||
* lock_use_node4 - # of locking operations that use 4th per-CPU node
|
||||
* lock_no_node - # of locking operations without using per-CPU node
|
||||
*
|
||||
* Subtracting lock_use_node[234] from lock_slowpath will give you
|
||||
* lock_use_node1.
|
||||
*
|
||||
* Writing to the "reset_counters" file will reset all the above counter
|
||||
* values.
|
||||
*
|
||||
* These statistical counters are implemented as per-cpu variables which are
|
||||
* summed and computed whenever the corresponding debugfs files are read. This
|
||||
* minimizes added overhead making the counters usable even in a production
|
||||
* environment.
|
||||
*
|
||||
* There may be slight difference between pv_kick_wake and pv_kick_unlock.
|
||||
*/
|
||||
enum qlock_stats {
|
||||
qstat_pv_hash_hops,
|
||||
qstat_pv_kick_unlock,
|
||||
qstat_pv_kick_wake,
|
||||
qstat_pv_latency_kick,
|
||||
qstat_pv_latency_wake,
|
||||
qstat_pv_lock_stealing,
|
||||
qstat_pv_spurious_wakeup,
|
||||
qstat_pv_wait_again,
|
||||
qstat_pv_wait_early,
|
||||
qstat_pv_wait_head,
|
||||
qstat_pv_wait_node,
|
||||
qstat_lock_pending,
|
||||
qstat_lock_slowpath,
|
||||
qstat_lock_use_node2,
|
||||
qstat_lock_use_node3,
|
||||
qstat_lock_use_node4,
|
||||
qstat_lock_no_node,
|
||||
qstat_num, /* Total number of statistical counters */
|
||||
qstat_reset_cnts = qstat_num,
|
||||
};
|
||||
#include "lock_events.h"
|
||||
|
||||
#ifdef CONFIG_QUEUED_LOCK_STAT
|
||||
#ifdef CONFIG_LOCK_EVENT_COUNTS
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
/*
|
||||
* Collect pvqspinlock statistics
|
||||
* Collect pvqspinlock locking event counts
|
||||
*/
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
static const char * const qstat_names[qstat_num + 1] = {
|
||||
[qstat_pv_hash_hops] = "pv_hash_hops",
|
||||
[qstat_pv_kick_unlock] = "pv_kick_unlock",
|
||||
[qstat_pv_kick_wake] = "pv_kick_wake",
|
||||
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
|
||||
[qstat_pv_latency_kick] = "pv_latency_kick",
|
||||
[qstat_pv_latency_wake] = "pv_latency_wake",
|
||||
[qstat_pv_lock_stealing] = "pv_lock_stealing",
|
||||
[qstat_pv_wait_again] = "pv_wait_again",
|
||||
[qstat_pv_wait_early] = "pv_wait_early",
|
||||
[qstat_pv_wait_head] = "pv_wait_head",
|
||||
[qstat_pv_wait_node] = "pv_wait_node",
|
||||
[qstat_lock_pending] = "lock_pending",
|
||||
[qstat_lock_slowpath] = "lock_slowpath",
|
||||
[qstat_lock_use_node2] = "lock_use_node2",
|
||||
[qstat_lock_use_node3] = "lock_use_node3",
|
||||
[qstat_lock_use_node4] = "lock_use_node4",
|
||||
[qstat_lock_no_node] = "lock_no_node",
|
||||
[qstat_reset_cnts] = "reset_counters",
|
||||
};
|
||||
#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
|
||||
|
||||
/*
|
||||
* Per-cpu counters
|
||||
* PV specific per-cpu counter
|
||||
*/
|
||||
static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
|
||||
static DEFINE_PER_CPU(u64, pv_kick_time);
|
||||
|
||||
/*
|
||||
* Function to read and return the qlock statistical counter values
|
||||
* Function to read and return the PV qspinlock counts.
|
||||
*
|
||||
* The following counters are handled specially:
|
||||
* 1. qstat_pv_latency_kick
|
||||
* 1. pv_latency_kick
|
||||
* Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
|
||||
* 2. qstat_pv_latency_wake
|
||||
* 2. pv_latency_wake
|
||||
* Average wake latency (ns) = pv_latency_wake/pv_kick_wake
|
||||
* 3. qstat_pv_hash_hops
|
||||
* 3. pv_hash_hops
|
||||
* Average hops/hash = pv_hash_hops/pv_kick_unlock
|
||||
*/
|
||||
static ssize_t qstat_read(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
ssize_t lockevent_read(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
int cpu, counter, len;
|
||||
u64 stat = 0, kicks = 0;
|
||||
int cpu, id, len;
|
||||
u64 sum = 0, kicks = 0;
|
||||
|
||||
/*
|
||||
* Get the counter ID stored in file->f_inode->i_private
|
||||
*/
|
||||
counter = (long)file_inode(file)->i_private;
|
||||
id = (long)file_inode(file)->i_private;
|
||||
|
||||
if (counter >= qstat_num)
|
||||
if (id >= lockevent_num)
|
||||
return -EBADF;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
stat += per_cpu(qstats[counter], cpu);
|
||||
sum += per_cpu(lockevents[id], cpu);
|
||||
/*
|
||||
* Need to sum additional counter for some of them
|
||||
* Need to sum additional counters for some of them
|
||||
*/
|
||||
switch (counter) {
|
||||
switch (id) {
|
||||
|
||||
case qstat_pv_latency_kick:
|
||||
case qstat_pv_hash_hops:
|
||||
kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
|
||||
case LOCKEVENT_pv_latency_kick:
|
||||
case LOCKEVENT_pv_hash_hops:
|
||||
kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
|
||||
break;
|
||||
|
||||
case qstat_pv_latency_wake:
|
||||
kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
|
||||
case LOCKEVENT_pv_latency_wake:
|
||||
kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (counter == qstat_pv_hash_hops) {
|
||||
if (id == LOCKEVENT_pv_hash_hops) {
|
||||
u64 frac = 0;
|
||||
|
||||
if (kicks) {
|
||||
frac = 100ULL * do_div(stat, kicks);
|
||||
frac = 100ULL * do_div(sum, kicks);
|
||||
frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a X.XX decimal number
|
||||
*/
|
||||
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
|
||||
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
|
||||
sum, frac);
|
||||
} else {
|
||||
/*
|
||||
* Round to the nearest ns
|
||||
*/
|
||||
if ((counter == qstat_pv_latency_kick) ||
|
||||
(counter == qstat_pv_latency_wake)) {
|
||||
if ((id == LOCKEVENT_pv_latency_kick) ||
|
||||
(id == LOCKEVENT_pv_latency_wake)) {
|
||||
if (kicks)
|
||||
stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
|
||||
sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
|
||||
}
|
||||
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
|
||||
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
|
||||
}
|
||||
|
||||
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to handle write request
|
||||
*
|
||||
* When counter = reset_cnts, reset all the counter values.
|
||||
* Since the counter updates aren't atomic, the resetting is done twice
|
||||
* to make sure that the counters are very likely to be all cleared.
|
||||
*/
|
||||
static ssize_t qstat_write(struct file *file, const char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* Get the counter ID stored in file->f_inode->i_private
|
||||
*/
|
||||
if ((long)file_inode(file)->i_private != qstat_reset_cnts)
|
||||
return count;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
int i;
|
||||
unsigned long *ptr = per_cpu_ptr(qstats, cpu);
|
||||
|
||||
for (i = 0 ; i < qstat_num; i++)
|
||||
WRITE_ONCE(ptr[i], 0);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Debugfs data structures
|
||||
*/
|
||||
static const struct file_operations fops_qstat = {
|
||||
.read = qstat_read,
|
||||
.write = qstat_write,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize debugfs for the qspinlock statistical counters
|
||||
*/
|
||||
static int __init init_qspinlock_stat(void)
|
||||
{
|
||||
struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
|
||||
int i;
|
||||
|
||||
if (!d_qstat)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Create the debugfs files
|
||||
*
|
||||
* As reading from and writing to the stat files can be slow, only
|
||||
* root is allowed to do the read/write to limit impact to system
|
||||
* performance.
|
||||
*/
|
||||
for (i = 0; i < qstat_num; i++)
|
||||
if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
|
||||
(void *)(long)i, &fops_qstat))
|
||||
goto fail_undo;
|
||||
|
||||
if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
|
||||
(void *)(long)qstat_reset_cnts, &fops_qstat))
|
||||
goto fail_undo;
|
||||
|
||||
return 0;
|
||||
fail_undo:
|
||||
debugfs_remove_recursive(d_qstat);
|
||||
out:
|
||||
pr_warn("Could not create 'qlockstat' debugfs entries\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
fs_initcall(init_qspinlock_stat);
|
||||
|
||||
/*
|
||||
* Increment the PV qspinlock statistical counters
|
||||
*/
|
||||
static inline void qstat_inc(enum qlock_stats stat, bool cond)
|
||||
{
|
||||
if (cond)
|
||||
this_cpu_inc(qstats[stat]);
|
||||
}
|
||||
|
||||
/*
|
||||
* PV hash hop count
|
||||
*/
|
||||
static inline void qstat_hop(int hopcnt)
|
||||
static inline void lockevent_pv_hop(int hopcnt)
|
||||
{
|
||||
this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
|
||||
this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu)
|
||||
|
||||
per_cpu(pv_kick_time, cpu) = start;
|
||||
pv_kick(cpu);
|
||||
this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
|
||||
this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val)
|
||||
*pkick_time = 0;
|
||||
pv_wait(ptr, val);
|
||||
if (*pkick_time) {
|
||||
this_cpu_add(qstats[qstat_pv_latency_wake],
|
||||
this_cpu_add(EVENT_COUNT(pv_latency_wake),
|
||||
sched_clock() - *pkick_time);
|
||||
qstat_inc(qstat_pv_kick_wake, true);
|
||||
lockevent_inc(pv_kick_wake);
|
||||
}
|
||||
}
|
||||
|
||||
#define pv_kick(c) __pv_kick(c)
|
||||
#define pv_wait(p, v) __pv_wait(p, v)
|
||||
|
||||
#else /* CONFIG_QUEUED_LOCK_STAT */
|
||||
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
|
||||
|
||||
static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
|
||||
static inline void qstat_hop(int hopcnt) { }
|
||||
#else /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
#endif /* CONFIG_QUEUED_LOCK_STAT */
|
||||
static inline void lockevent_pv_hop(int hopcnt) { }
|
||||
|
||||
#endif /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
@@ -1,339 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
|
||||
* generic spinlock implementation
|
||||
*
|
||||
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
|
||||
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
|
||||
* - Derived also from comments by Linus
|
||||
*/
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
enum rwsem_waiter_type {
|
||||
RWSEM_WAITING_FOR_WRITE,
|
||||
RWSEM_WAITING_FOR_READ
|
||||
};
|
||||
|
||||
struct rwsem_waiter {
|
||||
struct list_head list;
|
||||
struct task_struct *task;
|
||||
enum rwsem_waiter_type type;
|
||||
};
|
||||
|
||||
int rwsem_is_locked(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = 1;
|
||||
unsigned long flags;
|
||||
|
||||
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
|
||||
ret = (sem->count != 0);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_is_locked);
|
||||
|
||||
/*
|
||||
* initialise the semaphore
|
||||
*/
|
||||
void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
/*
|
||||
* Make sure we are not reinitializing a held semaphore:
|
||||
*/
|
||||
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
||||
lockdep_init_map(&sem->dep_map, name, key, 0);
|
||||
#endif
|
||||
sem->count = 0;
|
||||
raw_spin_lock_init(&sem->wait_lock);
|
||||
INIT_LIST_HEAD(&sem->wait_list);
|
||||
}
|
||||
EXPORT_SYMBOL(__init_rwsem);
|
||||
|
||||
/*
|
||||
* handle the lock release when processes blocked on it that can now run
|
||||
* - if we come here, then:
|
||||
* - the 'active count' _reached_ zero
|
||||
* - the 'waiting count' is non-zero
|
||||
* - the spinlock must be held by the caller
|
||||
* - woken process blocks are discarded from the list after having task zeroed
|
||||
* - writers are only woken if wakewrite is non-zero
|
||||
*/
|
||||
static inline struct rw_semaphore *
|
||||
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
|
||||
{
|
||||
struct rwsem_waiter *waiter;
|
||||
struct task_struct *tsk;
|
||||
int woken;
|
||||
|
||||
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
|
||||
|
||||
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
||||
if (wakewrite)
|
||||
/* Wake up a writer. Note that we do not grant it the
|
||||
* lock - it will have to acquire it when it runs. */
|
||||
wake_up_process(waiter->task);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* grant an infinite number of read locks to the front of the queue */
|
||||
woken = 0;
|
||||
do {
|
||||
struct list_head *next = waiter->list.next;
|
||||
|
||||
list_del(&waiter->list);
|
||||
tsk = waiter->task;
|
||||
/*
|
||||
* Make sure we do not wakeup the next reader before
|
||||
* setting the nil condition to grant the next reader;
|
||||
* otherwise we could miss the wakeup on the other
|
||||
* side and end up sleeping again. See the pairing
|
||||
* in rwsem_down_read_failed().
|
||||
*/
|
||||
smp_mb();
|
||||
waiter->task = NULL;
|
||||
wake_up_process(tsk);
|
||||
put_task_struct(tsk);
|
||||
woken++;
|
||||
if (next == &sem->wait_list)
|
||||
break;
|
||||
waiter = list_entry(next, struct rwsem_waiter, list);
|
||||
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
|
||||
|
||||
sem->count += woken;
|
||||
|
||||
out:
|
||||
return sem;
|
||||
}
|
||||
|
||||
/*
|
||||
* wake a single writer
|
||||
*/
|
||||
static inline struct rw_semaphore *
|
||||
__rwsem_wake_one_writer(struct rw_semaphore *sem)
|
||||
{
|
||||
struct rwsem_waiter *waiter;
|
||||
|
||||
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
|
||||
wake_up_process(waiter->task);
|
||||
|
||||
return sem;
|
||||
}
|
||||
|
||||
/*
|
||||
* get a read lock on the semaphore
|
||||
*/
|
||||
int __sched __down_read_common(struct rw_semaphore *sem, int state)
|
||||
{
|
||||
struct rwsem_waiter waiter;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
||||
/* granted */
|
||||
sem->count++;
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* set up my own style of waitqueue */
|
||||
waiter.task = current;
|
||||
waiter.type = RWSEM_WAITING_FOR_READ;
|
||||
get_task_struct(current);
|
||||
|
||||
list_add_tail(&waiter.list, &sem->wait_list);
|
||||
|
||||
/* wait to be given the lock */
|
||||
for (;;) {
|
||||
if (!waiter.task)
|
||||
break;
|
||||
if (signal_pending_state(state, current))
|
||||
goto out_nolock;
|
||||
set_current_state(state);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
schedule();
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
out:
|
||||
return 0;
|
||||
|
||||
out_nolock:
|
||||
/*
|
||||
* We didn't take the lock, so that there is a writer, which
|
||||
* is owner or the first waiter of the sem. If it's a waiter,
|
||||
* it will be woken by current owner. Not need to wake anybody.
|
||||
*/
|
||||
list_del(&waiter.list);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
void __sched __down_read(struct rw_semaphore *sem)
|
||||
{
|
||||
__down_read_common(sem, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
int __sched __down_read_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
return __down_read_common(sem, TASK_KILLABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* trylock for reading -- returns 1 if successful, 0 if contention
|
||||
*/
|
||||
int __down_read_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
||||
/* granted */
|
||||
sem->count++;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* get a write lock on the semaphore
|
||||
*/
|
||||
int __sched __down_write_common(struct rw_semaphore *sem, int state)
|
||||
{
|
||||
struct rwsem_waiter waiter;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
/* set up my own style of waitqueue */
|
||||
waiter.task = current;
|
||||
waiter.type = RWSEM_WAITING_FOR_WRITE;
|
||||
list_add_tail(&waiter.list, &sem->wait_list);
|
||||
|
||||
/* wait for someone to release the lock */
|
||||
for (;;) {
|
||||
/*
|
||||
* That is the key to support write lock stealing: allows the
|
||||
* task already on CPU to get the lock soon rather than put
|
||||
* itself into sleep and waiting for system woke it or someone
|
||||
* else in the head of the wait list up.
|
||||
*/
|
||||
if (sem->count == 0)
|
||||
break;
|
||||
if (signal_pending_state(state, current))
|
||||
goto out_nolock;
|
||||
|
||||
set_current_state(state);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
schedule();
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
}
|
||||
/* got the lock */
|
||||
sem->count = -1;
|
||||
list_del(&waiter.list);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
|
||||
return ret;
|
||||
|
||||
out_nolock:
|
||||
list_del(&waiter.list);
|
||||
if (!list_empty(&sem->wait_list) && sem->count >= 0)
|
||||
__rwsem_do_wake(sem, 0);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
void __sched __down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
__down_write_common(sem, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
int __sched __down_write_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
return __down_write_common(sem, TASK_KILLABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* trylock for writing -- returns 1 if successful, 0 if contention
|
||||
*/
|
||||
int __down_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->count == 0) {
|
||||
/* got the lock */
|
||||
sem->count = -1;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* release a read lock on the semaphore
|
||||
*/
|
||||
void __up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (--sem->count == 0 && !list_empty(&sem->wait_list))
|
||||
sem = __rwsem_wake_one_writer(sem);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* release a write lock on the semaphore
|
||||
*/
|
||||
void __up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
sem->count = 0;
|
||||
if (!list_empty(&sem->wait_list))
|
||||
sem = __rwsem_do_wake(sem, 1);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* downgrade a write lock into a read lock
|
||||
* - just wake up any readers at the front of the queue
|
||||
*/
|
||||
void __downgrade_write(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
sem->count = 1;
|
||||
if (!list_empty(&sem->wait_list))
|
||||
sem = __rwsem_do_wake(sem, 0);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
}
|
||||
|
||||
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||
* will notice the queued writer.
|
||||
*/
|
||||
wake_q_add(wake_q, waiter->task);
|
||||
lockevent_inc(rwsem_wake_writer);
|
||||
}
|
||||
|
||||
return;
|
||||
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||
goto try_reader_grant;
|
||||
}
|
||||
/*
|
||||
* It is not really necessary to set it to reader-owned here,
|
||||
* but it gives the spinners an early indication that the
|
||||
* readers now have the lock.
|
||||
* Set it to reader-owned to give spinners an early
|
||||
* indication that readers now have the lock.
|
||||
*/
|
||||
__rwsem_set_reader_owned(sem, waiter->task);
|
||||
}
|
||||
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||
}
|
||||
|
||||
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
||||
lockevent_cond_inc(rwsem_wake_reader, woken);
|
||||
if (list_empty(&sem->wait_list)) {
|
||||
/* hit end of list above */
|
||||
adjustment -= RWSEM_WAITING_BIAS;
|
||||
@@ -224,92 +225,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||
atomic_long_add(adjustment, &sem->count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the read lock to be granted
|
||||
*/
|
||||
static inline struct rw_semaphore __sched *
|
||||
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
|
||||
{
|
||||
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
|
||||
struct rwsem_waiter waiter;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
|
||||
waiter.task = current;
|
||||
waiter.type = RWSEM_WAITING_FOR_READ;
|
||||
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
if (list_empty(&sem->wait_list)) {
|
||||
/*
|
||||
* In case the wait queue is empty and the lock isn't owned
|
||||
* by a writer, this reader can exit the slowpath and return
|
||||
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
|
||||
* been set in the count.
|
||||
*/
|
||||
if (atomic_long_read(&sem->count) >= 0) {
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
return sem;
|
||||
}
|
||||
adjustment += RWSEM_WAITING_BIAS;
|
||||
}
|
||||
list_add_tail(&waiter.list, &sem->wait_list);
|
||||
|
||||
/* we're now waiting on the lock, but no longer actively locking */
|
||||
count = atomic_long_add_return(adjustment, &sem->count);
|
||||
|
||||
/*
|
||||
* If there are no active locks, wake the front queued process(es).
|
||||
*
|
||||
* If there are no writers and we are first in the queue,
|
||||
* wake our own waiter to join the existing active readers !
|
||||
*/
|
||||
if (count == RWSEM_WAITING_BIAS ||
|
||||
(count > RWSEM_WAITING_BIAS &&
|
||||
adjustment != -RWSEM_ACTIVE_READ_BIAS))
|
||||
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
|
||||
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
wake_up_q(&wake_q);
|
||||
|
||||
/* wait to be given the lock */
|
||||
while (true) {
|
||||
set_current_state(state);
|
||||
if (!waiter.task)
|
||||
break;
|
||||
if (signal_pending_state(state, current)) {
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
if (waiter.task)
|
||||
goto out_nolock;
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
break;
|
||||
}
|
||||
schedule();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return sem;
|
||||
out_nolock:
|
||||
list_del(&waiter.list);
|
||||
if (list_empty(&sem->wait_list))
|
||||
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
__visible struct rw_semaphore * __sched
|
||||
rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||
{
|
||||
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_down_read_failed);
|
||||
|
||||
__visible struct rw_semaphore * __sched
|
||||
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
|
||||
|
||||
/*
|
||||
* This function must be called with the sem->wait_lock held to prevent
|
||||
* race conditions between checking the rwsem wait list and setting the
|
||||
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
|
||||
*/
|
||||
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
|
||||
{
|
||||
long old, count = atomic_long_read(&sem->count);
|
||||
long count = atomic_long_read(&sem->count);
|
||||
|
||||
while (true) {
|
||||
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
|
||||
return false;
|
||||
|
||||
old = atomic_long_cmpxchg_acquire(&sem->count, count,
|
||||
count + RWSEM_ACTIVE_WRITE_BIAS);
|
||||
if (old == count) {
|
||||
while (!count || count == RWSEM_WAITING_BIAS) {
|
||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
|
||||
count + RWSEM_ACTIVE_WRITE_BIAS)) {
|
||||
rwsem_set_owner(sem);
|
||||
lockevent_inc(rwsem_opt_wlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
count = old;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool owner_on_cpu(struct task_struct *owner)
|
||||
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
|
||||
osq_unlock(&sem->osq);
|
||||
done:
|
||||
preempt_enable();
|
||||
lockevent_cond_inc(rwsem_opt_fail, !taken);
|
||||
return taken;
|
||||
}
|
||||
|
||||
@@ -504,6 +416,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Wait for the read lock to be granted
|
||||
*/
|
||||
static inline struct rw_semaphore __sched *
|
||||
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
|
||||
{
|
||||
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
|
||||
struct rwsem_waiter waiter;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
|
||||
waiter.task = current;
|
||||
waiter.type = RWSEM_WAITING_FOR_READ;
|
||||
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
if (list_empty(&sem->wait_list)) {
|
||||
/*
|
||||
* In case the wait queue is empty and the lock isn't owned
|
||||
* by a writer, this reader can exit the slowpath and return
|
||||
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
|
||||
* been set in the count.
|
||||
*/
|
||||
if (atomic_long_read(&sem->count) >= 0) {
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
rwsem_set_reader_owned(sem);
|
||||
lockevent_inc(rwsem_rlock_fast);
|
||||
return sem;
|
||||
}
|
||||
adjustment += RWSEM_WAITING_BIAS;
|
||||
}
|
||||
list_add_tail(&waiter.list, &sem->wait_list);
|
||||
|
||||
/* we're now waiting on the lock, but no longer actively locking */
|
||||
count = atomic_long_add_return(adjustment, &sem->count);
|
||||
|
||||
/*
|
||||
* If there are no active locks, wake the front queued process(es).
|
||||
*
|
||||
* If there are no writers and we are first in the queue,
|
||||
* wake our own waiter to join the existing active readers !
|
||||
*/
|
||||
if (count == RWSEM_WAITING_BIAS ||
|
||||
(count > RWSEM_WAITING_BIAS &&
|
||||
adjustment != -RWSEM_ACTIVE_READ_BIAS))
|
||||
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
|
||||
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
wake_up_q(&wake_q);
|
||||
|
||||
/* wait to be given the lock */
|
||||
while (true) {
|
||||
set_current_state(state);
|
||||
if (!waiter.task)
|
||||
break;
|
||||
if (signal_pending_state(state, current)) {
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
if (waiter.task)
|
||||
goto out_nolock;
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
break;
|
||||
}
|
||||
schedule();
|
||||
lockevent_inc(rwsem_sleep_reader);
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
lockevent_inc(rwsem_rlock);
|
||||
return sem;
|
||||
out_nolock:
|
||||
list_del(&waiter.list);
|
||||
if (list_empty(&sem->wait_list))
|
||||
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
lockevent_inc(rwsem_rlock_fail);
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
__visible struct rw_semaphore * __sched
|
||||
rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||
{
|
||||
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_down_read_failed);
|
||||
|
||||
__visible struct rw_semaphore * __sched
|
||||
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
|
||||
|
||||
/*
|
||||
* Wait until we successfully acquire the write lock
|
||||
*/
|
||||
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
|
||||
goto out_nolock;
|
||||
|
||||
schedule();
|
||||
lockevent_inc(rwsem_sleep_writer);
|
||||
set_current_state(state);
|
||||
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
|
||||
|
||||
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
|
||||
__set_current_state(TASK_RUNNING);
|
||||
list_del(&waiter.list);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
lockevent_inc(rwsem_wlock);
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -601,6 +606,7 @@ out_nolock:
|
||||
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
wake_up_q(&wake_q);
|
||||
lockevent_inc(rwsem_wlock_fail);
|
||||
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
|
||||
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
|
||||
rwsem_set_reader_owned(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read);
|
||||
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
rwsem_set_reader_owned(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = __down_read_trylock(sem);
|
||||
|
||||
if (ret == 1) {
|
||||
if (ret == 1)
|
||||
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
|
||||
rwsem_set_reader_owned(sem);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
|
||||
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write);
|
||||
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
rwsem_set_owner(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = __down_write_trylock(sem);
|
||||
|
||||
if (ret == 1) {
|
||||
if (ret == 1)
|
||||
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock);
|
||||
void up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
|
||||
|
||||
rwsem_clear_reader_owned(sem);
|
||||
__up_read(sem);
|
||||
}
|
||||
|
||||
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read);
|
||||
void up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
|
||||
|
||||
rwsem_clear_owner(sem);
|
||||
__up_write(sem);
|
||||
}
|
||||
|
||||
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write);
|
||||
void downgrade_write(struct rw_semaphore *sem)
|
||||
{
|
||||
lock_downgrade(&sem->dep_map, _RET_IP_);
|
||||
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
|
||||
|
||||
rwsem_set_reader_owned(sem);
|
||||
__downgrade_write(sem);
|
||||
}
|
||||
|
||||
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
|
||||
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
|
||||
rwsem_set_reader_owned(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read_nested);
|
||||
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
|
||||
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_down_write_nest_lock);
|
||||
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write_nested);
|
||||
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
rwsem_set_owner(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
|
||||
|
||||
void up_read_non_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
|
||||
sem);
|
||||
__up_read(sem);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,15 +23,44 @@
|
||||
* is involved. Ideally we would like to track all the readers that own
|
||||
* a rwsem, but the overhead is simply too big.
|
||||
*/
|
||||
#include "lock_events.h"
|
||||
|
||||
#define RWSEM_READER_OWNED (1UL << 0)
|
||||
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
|
||||
|
||||
#ifdef CONFIG_DEBUG_RWSEMS
|
||||
# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
|
||||
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
|
||||
if (!debug_locks_silent && \
|
||||
WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
|
||||
#c, atomic_long_read(&(sem)->count), \
|
||||
(long)((sem)->owner), (long)current, \
|
||||
list_empty(&(sem)->wait_list) ? "" : "not ")) \
|
||||
debug_locks_off(); \
|
||||
} while (0)
|
||||
#else
|
||||
# define DEBUG_RWSEMS_WARN_ON(c)
|
||||
# define DEBUG_RWSEMS_WARN_ON(c, sem)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
|
||||
* Adapted largely from include/asm-i386/rwsem.h
|
||||
* by Paul Mackerras <paulus@samba.org>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* the semaphore definition
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
# define RWSEM_ACTIVE_MASK 0xffffffffL
|
||||
#else
|
||||
# define RWSEM_ACTIVE_MASK 0x0000ffffL
|
||||
#endif
|
||||
|
||||
#define RWSEM_ACTIVE_BIAS 0x00000001L
|
||||
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
|
||||
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
|
||||
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
|
||||
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
/*
|
||||
* All writes to owner are protected by WRITE_ONCE() to make sure that
|
||||
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
|
||||
extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
|
||||
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
|
||||
extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
|
||||
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
|
||||
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
|
||||
|
||||
/*
|
||||
* lock for reading
|
||||
*/
|
||||
static inline void __down_read(struct rw_semaphore *sem)
|
||||
{
|
||||
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
|
||||
rwsem_down_read_failed(sem);
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
|
||||
RWSEM_READER_OWNED), sem);
|
||||
} else {
|
||||
rwsem_set_reader_owned(sem);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int __down_read_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
|
||||
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
|
||||
return -EINTR;
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
|
||||
RWSEM_READER_OWNED), sem);
|
||||
} else {
|
||||
rwsem_set_reader_owned(sem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __down_read_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
/*
|
||||
* Optimize for the case when the rwsem is not locked at all.
|
||||
*/
|
||||
long tmp = RWSEM_UNLOCKED_VALUE;
|
||||
|
||||
lockevent_inc(rwsem_rtrylock);
|
||||
do {
|
||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
|
||||
tmp + RWSEM_ACTIVE_READ_BIAS)) {
|
||||
rwsem_set_reader_owned(sem);
|
||||
return 1;
|
||||
}
|
||||
} while (tmp >= 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* lock for writing
|
||||
*/
|
||||
static inline void __down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
|
||||
&sem->count);
|
||||
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
|
||||
rwsem_down_write_failed(sem);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
static inline int __down_write_killable(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
|
||||
&sem->count);
|
||||
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
|
||||
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
|
||||
return -EINTR;
|
||||
rwsem_set_owner(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __down_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
lockevent_inc(rwsem_wtrylock);
|
||||
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
|
||||
RWSEM_ACTIVE_WRITE_BIAS);
|
||||
if (tmp == RWSEM_UNLOCKED_VALUE) {
|
||||
rwsem_set_owner(sem);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* unlock after reading
|
||||
*/
|
||||
static inline void __up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
|
||||
sem);
|
||||
rwsem_clear_reader_owned(sem);
|
||||
tmp = atomic_long_dec_return_release(&sem->count);
|
||||
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
|
||||
rwsem_wake(sem);
|
||||
}
|
||||
|
||||
/*
|
||||
* unlock after writing
|
||||
*/
|
||||
static inline void __up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
|
||||
rwsem_clear_owner(sem);
|
||||
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
|
||||
&sem->count) < 0))
|
||||
rwsem_wake(sem);
|
||||
}
|
||||
|
||||
/*
|
||||
* downgrade write lock to read lock
|
||||
*/
|
||||
static inline void __downgrade_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
/*
|
||||
* When downgrading from exclusive to shared ownership,
|
||||
* anything inside the write-locked region cannot leak
|
||||
* into the read side. In contrast, anything in the
|
||||
* read-locked region is ok to be re-ordered into the
|
||||
* write side. As such, rely on RELEASE semantics.
|
||||
*/
|
||||
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
|
||||
tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
|
||||
rwsem_set_reader_owned(sem);
|
||||
if (tmp < 0)
|
||||
rwsem_downgrade_wake(sem);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user