Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (28 commits)
  rcu: Move end of special early-boot RCU operation earlier
  rcu: Changes from reviews: avoid casts, fix/add warnings, improve comments
  rcu: Create rcutree plugins to handle hotplug CPU for multi-level trees
  rcu: Remove lockdep annotations from RCU's _notrace() API members
  rcu: Add #ifdef to suppress __rcu_offline_cpu() warning in !HOTPLUG_CPU builds
  rcu: Add CPU-offline processing for single-node configurations
  rcu: Add "notrace" to RCU function headers used by ftrace
  rcu: Remove CONFIG_PREEMPT_RCU
  rcu: Merge preemptable-RCU functionality into hierarchical RCU
  rcu: Simplify rcu_pending()/rcu_check_callbacks() API
  rcu: Use debugfs_remove_recursive() simplify code.
  rcu: Merge per-RCU-flavor initialization into pre-existing macro
  rcu: Fix online/offline indication for rcudata.csv trace file
  rcu: Consolidate sparse and lockdep declarations in include/linux/rcupdate.h
  rcu: Renamings to increase RCU clarity
  rcu: Move private definitions from include/linux/rcutree.h to kernel/rcutree.h
  rcu: Expunge lingering references to CONFIG_CLASSIC_RCU, optimize on !SMP
  rcu: Delay rcu_barrier() wait until beginning of next CPU-hotunplug operation.
  rcu: Fix typo in rcu_irq_exit() comment header
  rcu: Make rcupreempt_trace.c look at offline CPUs
  ...
This commit is contained in:
Linus Torvalds
2009-09-11 13:20:18 -07:00
37 changed files with 1638 additions and 3669 deletions

View File

@@ -80,11 +80,9 @@ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
obj-$(CONFIG_TREE_RCU) += rcutree.o
obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o

View File

@@ -1014,6 +1014,7 @@ NORET_TYPE void do_exit(long code)
validate_creds_for_do_exit(tsk);
preempt_disable();
exit_rcu();
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
schedule();

View File

@@ -1007,10 +1007,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
copy_flags(clone_flags, p);
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
#ifdef CONFIG_PREEMPT_RCU
p->rcu_read_lock_nesting = 0;
p->rcu_flipctr_idx = 0;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
rcu_copy_process(p);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);

View File

@@ -1,807 +0,0 @@
/*
* Read-Copy Update mechanism for mutual exclusion
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright IBM Corporation, 2001
*
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Manfred Spraul <manfred@colorfullife.com>
*
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* Papers:
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
#endif
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_ctrlblk = {
.cur = -300,
.completed = -300,
.pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
.cpumask = CPU_BITS_NONE,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.cur = -300,
.completed = -300,
.pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
.cpumask = CPU_BITS_NONE,
};
static DEFINE_PER_CPU(struct rcu_data, rcu_data);
static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
/*
* Increment the quiescent state counter.
* The counter is a bit degenerated: We do not need to know
* how many quiescent states passed, just if there was at least
* one since the start of the grace period. Thus just a flag.
*/
void rcu_qsctr_inc(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
rdp->passed_quiesc = 1;
}
void rcu_bh_qsctr_inc(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
rdp->passed_quiesc = 1;
}
static int blimit = 10;
static int qhimark = 10000;
static int qlowmark = 100;
#ifdef CONFIG_SMP
static void force_quiescent_state(struct rcu_data *rdp,
struct rcu_ctrlblk *rcp)
{
int cpu;
unsigned long flags;
set_need_resched();
spin_lock_irqsave(&rcp->lock, flags);
if (unlikely(!rcp->signaled)) {
rcp->signaled = 1;
/*
* Don't send IPI to itself. With irqs disabled,
* rdp->cpu is the current cpu.
*
* cpu_online_mask is updated by the _cpu_down()
* using __stop_machine(). Since we're in irqs disabled
* section, __stop_machine() is not exectuting, hence
* the cpu_online_mask is stable.
*
* However, a cpu might have been offlined _just_ before
* we disabled irqs while entering here.
* And rcu subsystem might not yet have handled the CPU_DEAD
* notification, leading to the offlined cpu's bit
* being set in the rcp->cpumask.
*
* Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
* sending smp_reschedule() to an offlined CPU.
*/
for_each_cpu_and(cpu,
to_cpumask(rcp->cpumask), cpu_online_mask) {
if (cpu != rdp->cpu)
smp_send_reschedule(cpu);
}
}
spin_unlock_irqrestore(&rcp->lock, flags);
}
#else
static inline void force_quiescent_state(struct rcu_data *rdp,
struct rcu_ctrlblk *rcp)
{
set_need_resched();
}
#endif
static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
long batch;
head->next = NULL;
smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
/*
* Determine the batch number of this callback.
*
* Using ACCESS_ONCE to avoid the following error when gcc eliminates
* local variable "batch" and emits codes like this:
* 1) rdp->batch = rcp->cur + 1 # gets old value
* ......
* 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
* then [*nxttail[0], *nxttail[1]) may contain callbacks
* that batch# = rdp->batch, see the comment of struct rcu_data.
*/
batch = ACCESS_ONCE(rcp->cur) + 1;
if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
/* process callbacks */
rdp->nxttail[0] = rdp->nxttail[1];
rdp->nxttail[1] = rdp->nxttail[2];
if (rcu_batch_after(batch - 1, rdp->batch))
rdp->nxttail[0] = rdp->nxttail[2];
}
rdp->batch = batch;
*rdp->nxttail[2] = head;
rdp->nxttail[2] = &head->next;
if (unlikely(++rdp->qlen > qhimark)) {
rdp->blimit = INT_MAX;
force_quiescent_state(rdp, &rcu_ctrlblk);
}
}
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
{
rcp->gp_start = jiffies;
rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
}
static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
{
int cpu;
long delta;
unsigned long flags;
/* Only let one CPU complain about others per time interval. */
spin_lock_irqsave(&rcp->lock, flags);
delta = jiffies - rcp->jiffies_stall;
if (delta < 2 || rcp->cur != rcp->completed) {
spin_unlock_irqrestore(&rcp->lock, flags);
return;
}
rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
spin_unlock_irqrestore(&rcp->lock, flags);
/* OK, time to rat on our buddy... */
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
for_each_possible_cpu(cpu) {
if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
printk(" %d", cpu);
}
printk(" (detected by %d, t=%ld jiffies)\n",
smp_processor_id(), (long)(jiffies - rcp->gp_start));
}
static void print_cpu_stall(struct rcu_ctrlblk *rcp)
{
unsigned long flags;
printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
smp_processor_id(), jiffies,
jiffies - rcp->gp_start);
dump_stack();
spin_lock_irqsave(&rcp->lock, flags);
if ((long)(jiffies - rcp->jiffies_stall) >= 0)
rcp->jiffies_stall =
jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
spin_unlock_irqrestore(&rcp->lock, flags);
set_need_resched(); /* kick ourselves to get things going. */
}
static void check_cpu_stall(struct rcu_ctrlblk *rcp)
{
long delta;
delta = jiffies - rcp->jiffies_stall;
if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
delta >= 0) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rcp);
} else if (rcp->cur != rcp->completed && delta >= 2) {
/* They had two seconds to dump stack, so complain. */
print_other_cpu_stall(rcp);
}
}
#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
{
}
static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
{
}
#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/**
* call_rcu - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
* @func: actual update function to be invoked after the grace period
*
* The update function will be invoked some time after a full grace
* period elapses, in other words after all currently executing RCU
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
void call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
unsigned long flags;
head->func = func;
local_irq_save(flags);
__call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(call_rcu);
/**
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
* @head: structure to be used for queueing the RCU updates.
* @func: actual update function to be invoked after the grace period
*
* The update function will be invoked some time after a full grace
* period elapses, in other words after all currently executing RCU
* read-side critical sections have completed. call_rcu_bh() assumes
* that the read-side critical sections end on completion of a softirq
* handler. This means that read-side critical sections in process
* context must not be interrupted by softirqs. This interface is to be
* used when most of the read-side critical sections are in softirq context.
* RCU read-side critical sections are delimited by rcu_read_lock() and
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
* and rcu_read_unlock_bh(), if in process context. These may be nested.
*/
void call_rcu_bh(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
unsigned long flags;
head->func = func;
local_irq_save(flags);
__call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
*/
long rcu_batches_completed(void)
{
return rcu_ctrlblk.completed;
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
*/
long rcu_batches_completed_bh(void)
{
return rcu_bh_ctrlblk.completed;
}
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/* Raises the softirq for processing rcu_callbacks. */
static inline void raise_rcu_softirq(void)
{
raise_softirq(RCU_SOFTIRQ);
}
/*
* Invoke the completed RCU callbacks. They are expected to be in
* a per-cpu list.
*/
static void rcu_do_batch(struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list;
int count = 0;
list = rdp->donelist;
while (list) {
next = list->next;
prefetch(next);
list->func(list);
list = next;
if (++count >= rdp->blimit)
break;
}
rdp->donelist = list;
local_irq_save(flags);
rdp->qlen -= count;
local_irq_restore(flags);
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
rdp->blimit = blimit;
if (!rdp->donelist)
rdp->donetail = &rdp->donelist;
else
raise_rcu_softirq();
}
/*
* Grace period handling:
* The grace period handling consists out of two steps:
* - A new grace period is started.
* This is done by rcu_start_batch. The start is not broadcasted to
* all cpus, they must pick this up by comparing rcp->cur with
* rdp->quiescbatch. All cpus are recorded in the
* rcu_ctrlblk.cpumask bitmap.
* - All cpus must go through a quiescent state.
* Since the start of the grace period is not broadcasted, at least two
* calls to rcu_check_quiescent_state are required:
* The first call just notices that a new grace period is running. The
* following calls check if there was a quiescent state since the beginning
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
* the bitmap is empty, then the grace period is completed.
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
* period (if necessary).
*/
/*
* Register a new batch of callbacks, and start it up if there is currently no
* active batch and the batch to be registered has not already occurred.
* Caller must hold rcu_ctrlblk.lock.
*/
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
{
if (rcp->cur != rcp->pending &&
rcp->completed == rcp->cur) {
rcp->cur++;
record_gp_stall_check_time(rcp);
/*
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
* Barrier Otherwise it can cause tickless idle CPUs to be
* included in rcp->cpumask, which will extend graceperiods
* unnecessarily.
*/
smp_mb();
cpumask_andnot(to_cpumask(rcp->cpumask),
cpu_online_mask, nohz_cpu_mask);
rcp->signaled = 0;
}
}
/*
* cpu went through a quiescent state since the beginning of the grace period.
* Clear it from the cpu mask and complete the grace period if it was the last
* cpu. Start another grace period if someone has further entries pending
*/
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
{
cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
if (cpumask_empty(to_cpumask(rcp->cpumask))) {
/* batch completed ! */
rcp->completed = rcp->cur;
rcu_start_batch(rcp);
}
}
/*
* Check if the cpu has gone through a quiescent state (say context
* switch). If so and if it already hasn't done so in this RCU
* quiescent cycle, then indicate that it has done so.
*/
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
unsigned long flags;
if (rdp->quiescbatch != rcp->cur) {
/* start new grace period: */
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
rdp->quiescbatch = rcp->cur;
return;
}
/* Grace period already completed for this cpu?
* qs_pending is checked instead of the actual bitmap to avoid
* cacheline trashing.
*/
if (!rdp->qs_pending)
return;
/*
* Was there a quiescent state since the beginning of the grace
* period? If no, then exit and wait for the next call.
*/
if (!rdp->passed_quiesc)
return;
rdp->qs_pending = 0;
spin_lock_irqsave(&rcp->lock, flags);
/*
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
* during cpu startup. Ignore the quiescent state.
*/
if (likely(rdp->quiescbatch == rcp->cur))
cpu_quiet(rdp->cpu, rcp);
spin_unlock_irqrestore(&rcp->lock, flags);
}
#ifdef CONFIG_HOTPLUG_CPU
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
* locking requirements, the list it's pulling from has to belong to a cpu
* which is dead and hence not processing interrupts.
*/
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
struct rcu_head **tail, long batch)
{
unsigned long flags;
if (list) {
local_irq_save(flags);
this_rdp->batch = batch;
*this_rdp->nxttail[2] = list;
this_rdp->nxttail[2] = tail;
local_irq_restore(flags);
}
}
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
{
unsigned long flags;
/*
* if the cpu going offline owns the grace period
* we can block indefinitely waiting for it, so flush
* it here
*/
spin_lock_irqsave(&rcp->lock, flags);
if (rcp->cur != rcp->completed)
cpu_quiet(rdp->cpu, rcp);
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
spin_unlock(&rcp->lock);
this_rdp->qlen += rdp->qlen;
local_irq_restore(flags);
}
static void rcu_offline_cpu(int cpu)
{
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
&per_cpu(rcu_data, cpu));
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
&per_cpu(rcu_bh_data, cpu));
put_cpu_var(rcu_data);
put_cpu_var(rcu_bh_data);
}
#else
static void rcu_offline_cpu(int cpu)
{
}
#endif
/*
* This does the RCU processing work from softirq context.
*/
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
unsigned long flags;
long completed_snap;
if (rdp->nxtlist) {
local_irq_save(flags);
completed_snap = ACCESS_ONCE(rcp->completed);
/*
* move the other grace-period-completed entries to
* [rdp->nxtlist, *rdp->nxttail[0]) temporarily
*/
if (!rcu_batch_before(completed_snap, rdp->batch))
rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
rdp->nxttail[0] = rdp->nxttail[1];
/*
* the grace period for entries in
* [rdp->nxtlist, *rdp->nxttail[0]) has completed and
* move these entries to donelist
*/
if (rdp->nxttail[0] != &rdp->nxtlist) {
*rdp->donetail = rdp->nxtlist;
rdp->donetail = rdp->nxttail[0];
rdp->nxtlist = *rdp->nxttail[0];
*rdp->donetail = NULL;
if (rdp->nxttail[1] == rdp->nxttail[0])
rdp->nxttail[1] = &rdp->nxtlist;
if (rdp->nxttail[2] == rdp->nxttail[0])
rdp->nxttail[2] = &rdp->nxtlist;
rdp->nxttail[0] = &rdp->nxtlist;
}
local_irq_restore(flags);
if (rcu_batch_after(rdp->batch, rcp->pending)) {
unsigned long flags2;
/* and start it/schedule start if it's a new batch */
spin_lock_irqsave(&rcp->lock, flags2);
if (rcu_batch_after(rdp->batch, rcp->pending)) {
rcp->pending = rdp->batch;
rcu_start_batch(rcp);
}
spin_unlock_irqrestore(&rcp->lock, flags2);
}
}
rcu_check_quiescent_state(rcp, rdp);
if (rdp->donelist)
rcu_do_batch(rdp);
}
static void rcu_process_callbacks(struct softirq_action *unused)
{
/*
* Memory references from any prior RCU read-side critical sections
* executed by the interrupted code must be see before any RCU
* grace-period manupulations below.
*/
smp_mb(); /* See above block comment. */
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
/*
* Memory references from any later RCU read-side critical sections
* executed by the interrupted code must be see after any RCU
* grace-period manupulations above.
*/
smp_mb(); /* See above block comment. */
}
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
{
/* Check for CPU stalls, if enabled. */
check_cpu_stall(rcp);
if (rdp->nxtlist) {
long completed_snap = ACCESS_ONCE(rcp->completed);
/*
* This cpu has pending rcu entries and the grace period
* for them has completed.
*/
if (!rcu_batch_before(completed_snap, rdp->batch))
return 1;
if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
rdp->nxttail[0] != rdp->nxttail[1])
return 1;
if (rdp->nxttail[0] != &rdp->nxtlist)
return 1;
/*
* This cpu has pending rcu entries and the new batch
* for then hasn't been started nor scheduled start
*/
if (rcu_batch_after(rdp->batch, rcp->pending))
return 1;
}
/* This cpu has finished callbacks to invoke */
if (rdp->donelist)
return 1;
/* The rcu core waits for a quiescent state from the cpu */
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
return 1;
/* nothing to do */
return 0;
}
/*
* Check to see if there is any immediate RCU-related work to be done
* by the current CPU, returning 1 if so. This function is part of the
* RCU implementation; it is -not- an exported member of the RCU API.
*/
int rcu_pending(int cpu)
{
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
}
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
*/
int rcu_needs_cpu(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
}
/*
* Top-level function driving RCU grace-period detection, normally
* invoked from the scheduler-clock interrupt. This function simply
* increments counters that are read only from softirq by this same
* CPU, so there are no memory barriers required.
*/
void rcu_check_callbacks(int cpu, int user)
{
if (user ||
(idle_cpu(cpu) && rcu_scheduler_active &&
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
/*
* Get here if this CPU took its interrupt from user
* mode or from the idle loop, and if this is not a
* nested interrupt. In this case, the CPU is in
* a quiescent state, so count it.
*
* Also do a memory barrier. This is needed to handle
* the case where writes from a preempt-disable section
* of code get reordered into schedule() by this CPU's
* write buffer. The memory barrier makes sure that
* the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
* by other CPUs to happen after any such write.
*/
smp_mb(); /* See above block comment. */
rcu_qsctr_inc(cpu);
rcu_bh_qsctr_inc(cpu);
} else if (!in_softirq()) {
/*
* Get here if this CPU did not take its interrupt from
* softirq, in other words, if it is not interrupting
* a rcu_bh read-side critical section. This is an _bh
* critical section, so count it. The memory barrier
* is needed for the same reason as is the above one.
*/
smp_mb(); /* See above block comment. */
rcu_bh_qsctr_inc(cpu);
}
raise_rcu_softirq();
}
static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
unsigned long flags;
spin_lock_irqsave(&rcp->lock, flags);
memset(rdp, 0, sizeof(*rdp));
rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
rdp->donetail = &rdp->donelist;
rdp->quiescbatch = rcp->completed;
rdp->qs_pending = 0;
rdp->cpu = cpu;
rdp->blimit = blimit;
spin_unlock_irqrestore(&rcp->lock, flags);
}
static void __cpuinit rcu_online_cpu(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
rcu_online_cpu(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
rcu_offline_cpu(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata rcu_nb = {
.notifier_call = rcu_cpu_notify,
};
/*
* Initializes rcu mechanism. Assumed to be called early.
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
* Note that rcu_qsctr and friends are implicitly
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
*/
void __init __rcu_init(void)
{
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
}
module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);

View File

@@ -98,6 +98,30 @@ void synchronize_rcu(void)
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
/**
* synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
*
* Control will return to the caller some time after a full rcu_bh grace
* period has elapsed, in other words after all currently executing rcu_bh
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
* and may be nested.
*/
void synchronize_rcu_bh(void)
{
struct rcu_synchronize rcu;
if (rcu_blocking_is_gp())
return;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
static void rcu_barrier_callback(struct rcu_head *notused)
{
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type)
static inline void wait_migrated_callbacks(void)
{
wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
smp_mb(); /* In case we didn't sleep. */
}
/*
@@ -192,9 +217,13 @@ static void rcu_migrate_callback(struct rcu_head *notused)
wake_up(&rcu_migrate_wq);
}
extern int rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu);
static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
unsigned long action, void *hcpu)
{
rcu_cpu_notify(self, action, hcpu);
if (action == CPU_DYING) {
/*
* preempt_disable() in on_each_cpu() prevents stop_machine(),
@@ -209,7 +238,8 @@ static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
} else if (action == CPU_POST_DEAD) {
} else if (action == CPU_DOWN_PREPARE) {
/* Don't need to wait until next removal operation. */
/* rcu_migrate_head is protected by cpu_add_remove_lock */
wait_migrated_callbacks();
}
@@ -219,8 +249,18 @@ static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
void __init rcu_init(void)
{
int i;
__rcu_init();
hotcpu_notifier(rcu_barrier_cpu_hotplug, 0);
cpu_notifier(rcu_barrier_cpu_hotplug, 0);
/*
* We don't need protection against CPU-hotplug here because
* this is called early in boot, before either interrupts
* or the scheduler are operational.
*/
for_each_online_cpu(i)
rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
}
void rcu_scheduler_starting(void)

File diff suppressed because it is too large Load Diff

View File

@@ -1,334 +0,0 @@
/*
* Read-Copy Update tracing for realtime implementation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright IBM Corporation, 2006
*
* Papers: http://www.rdrop.com/users/paulmck/RCU
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU/ *.txt
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/rcupreempt_trace.h>
#include <linux/debugfs.h>
static struct mutex rcupreempt_trace_mutex;
static char *rcupreempt_trace_buf;
#define RCUPREEMPT_TRACE_BUF_SIZE 4096
void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
{
trace->done_length += trace->wait_length;
trace->done_add += trace->wait_length;
trace->wait_length = 0;
}
void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
{
trace->wait_length += trace->next_length;
trace->wait_add += trace->next_length;
trace->next_length = 0;
}
void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
{
atomic_inc(&trace->rcu_try_flip_1);
}
void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
{
atomic_inc(&trace->rcu_try_flip_e1);
}
void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_i1++;
}
void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_ie1++;
}
void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_g1++;
}
void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_a1++;
}
void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_ae1++;
}
void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_a2++;
}
void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_z1++;
}
void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_ze1++;
}
void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_z2++;
}
void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_m1++;
}
void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_me1++;
}
void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
{
trace->rcu_try_flip_m2++;
}
void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
{
trace->rcu_check_callbacks++;
}
void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
{
trace->done_remove += trace->done_length;
trace->done_length = 0;
}
void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
{
atomic_inc(&trace->done_invoked);
}
void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
{
trace->next_add++;
trace->next_length++;
}
static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
{
struct rcupreempt_trace *cp;
int cpu;
memset(sp, 0, sizeof(*sp));
for_each_possible_cpu(cpu) {
cp = rcupreempt_trace_cpu(cpu);
sp->next_length += cp->next_length;
sp->next_add += cp->next_add;
sp->wait_length += cp->wait_length;
sp->wait_add += cp->wait_add;
sp->done_length += cp->done_length;
sp->done_add += cp->done_add;
sp->done_remove += cp->done_remove;
atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked);
sp->rcu_check_callbacks += cp->rcu_check_callbacks;
atomic_add(atomic_read(&cp->rcu_try_flip_1),
&sp->rcu_try_flip_1);
atomic_add(atomic_read(&cp->rcu_try_flip_e1),
&sp->rcu_try_flip_e1);
sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
}
}
static ssize_t rcustats_read(struct file *filp, char __user *buffer,
size_t count, loff_t *ppos)
{
struct rcupreempt_trace trace;
ssize_t bcount;
int cnt = 0;
rcupreempt_trace_sum(&trace);
mutex_lock(&rcupreempt_trace_mutex);
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
"ggp=%ld rcc=%ld\n",
rcu_batches_completed(),
trace.rcu_check_callbacks);
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
"na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
"1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
"z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
trace.next_add, trace.next_length,
trace.wait_add, trace.wait_length,
trace.done_add, trace.done_length,
trace.done_remove, atomic_read(&trace.done_invoked),
atomic_read(&trace.rcu_try_flip_1),
atomic_read(&trace.rcu_try_flip_e1),
trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
trace.rcu_try_flip_g1,
trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
trace.rcu_try_flip_a2,
trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
trace.rcu_try_flip_z2,
trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
trace.rcu_try_flip_m2);
bcount = simple_read_from_buffer(buffer, count, ppos,
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
mutex_unlock(&rcupreempt_trace_mutex);
return bcount;
}
static ssize_t rcugp_read(struct file *filp, char __user *buffer,
size_t count, loff_t *ppos)
{
long oldgp = rcu_batches_completed();
ssize_t bcount;
mutex_lock(&rcupreempt_trace_mutex);
synchronize_rcu();
snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
"oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
bcount = simple_read_from_buffer(buffer, count, ppos,
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
mutex_unlock(&rcupreempt_trace_mutex);
return bcount;
}
static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
size_t count, loff_t *ppos)
{
int cnt = 0;
int cpu;
int f = rcu_batches_completed() & 0x1;
ssize_t bcount;
mutex_lock(&rcupreempt_trace_mutex);
cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
"CPU last cur F M\n");
for_each_online_cpu(cpu) {
long *flipctr = rcupreempt_flipctr(cpu);
cnt += snprintf(&rcupreempt_trace_buf[cnt],
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
"%3d %4ld %3ld %d %d\n",
cpu,
flipctr[!f],
flipctr[f],
rcupreempt_flip_flag(cpu),
rcupreempt_mb_flag(cpu));
}
cnt += snprintf(&rcupreempt_trace_buf[cnt],
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
"ggp = %ld, state = %s\n",
rcu_batches_completed(),
rcupreempt_try_flip_state_name());
cnt += snprintf(&rcupreempt_trace_buf[cnt],
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
"\n");
bcount = simple_read_from_buffer(buffer, count, ppos,
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
mutex_unlock(&rcupreempt_trace_mutex);
return bcount;
}
static struct file_operations rcustats_fops = {
.owner = THIS_MODULE,
.read = rcustats_read,
};
static struct file_operations rcugp_fops = {
.owner = THIS_MODULE,
.read = rcugp_read,
};
static struct file_operations rcuctrs_fops = {
.owner = THIS_MODULE,
.read = rcuctrs_read,
};
static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
static int rcupreempt_debugfs_init(void)
{
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto out;
statdir = debugfs_create_file("rcustats", 0444, rcudir,
NULL, &rcustats_fops);
if (!statdir)
goto free_out;
gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
if (!gpdir)
goto free_out;
ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
NULL, &rcuctrs_fops);
if (!ctrsdir)
goto free_out;
return 0;
free_out:
if (statdir)
debugfs_remove(statdir);
if (gpdir)
debugfs_remove(gpdir);
debugfs_remove(rcudir);
out:
return 1;
}
static int __init rcupreempt_trace_init(void)
{
int ret;
mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
if (!rcupreempt_trace_buf)
return 1;
ret = rcupreempt_debugfs_init();
if (ret)
kfree(rcupreempt_trace_buf);
return ret;
}
static void __exit rcupreempt_trace_cleanup(void)
{
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
debugfs_remove(rcudir);
kfree(rcupreempt_trace_buf);
}
module_init(rcupreempt_trace_init);
module_exit(rcupreempt_trace_cleanup);

View File

@@ -257,14 +257,14 @@ struct rcu_torture_ops {
void (*init)(void);
void (*cleanup)(void);
int (*readlock)(void);
void (*readdelay)(struct rcu_random_state *rrsp);
void (*read_delay)(struct rcu_random_state *rrsp);
void (*readunlock)(int idx);
int (*completed)(void);
void (*deferredfree)(struct rcu_torture *p);
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*cb_barrier)(void);
int (*stats)(char *page);
int irqcapable;
int irq_capable;
char *name;
};
static struct rcu_torture_ops *cur_ops = NULL;
@@ -320,7 +320,7 @@ rcu_torture_cb(struct rcu_head *p)
rp->rtort_mbtest = 0;
rcu_torture_free(rp);
} else
cur_ops->deferredfree(rp);
cur_ops->deferred_free(rp);
}
static void rcu_torture_deferred_free(struct rcu_torture *p)
@@ -329,18 +329,18 @@ static void rcu_torture_deferred_free(struct rcu_torture *p)
}
static struct rcu_torture_ops rcu_ops = {
.init = NULL,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
.readdelay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.completed = rcu_torture_completed,
.deferredfree = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
.stats = NULL,
.irqcapable = 1,
.name = "rcu"
.init = NULL,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.completed = rcu_torture_completed,
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
.stats = NULL,
.irq_capable = 1,
.name = "rcu"
};
static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
@@ -370,18 +370,18 @@ static void rcu_sync_torture_init(void)
}
static struct rcu_torture_ops rcu_sync_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
.readdelay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.completed = rcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_sync"
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.completed = rcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_sync"
};
/*
@@ -432,33 +432,33 @@ static void rcu_bh_torture_synchronize(void)
}
static struct rcu_torture_ops rcu_bh_ops = {
.init = NULL,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_bh"
.init = NULL,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh"
};
static struct rcu_torture_ops rcu_bh_sync_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.irqcapable = 1,
.name = "rcu_bh_sync"
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh_sync"
};
/*
@@ -530,17 +530,17 @@ static int srcu_torture_stats(char *page)
}
static struct rcu_torture_ops srcu_ops = {
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.readdelay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.completed = srcu_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu"
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu"
};
/*
@@ -574,32 +574,49 @@ static void sched_torture_synchronize(void)
}
static struct rcu_torture_ops sched_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferredfree = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
.stats = NULL,
.irqcapable = 1,
.name = "sched"
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
.stats = NULL,
.irq_capable = 1,
.name = "sched"
};
static struct rcu_torture_ops sched_ops_sync = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferredfree = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.name = "sched_sync"
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
.stats = NULL,
.name = "sched_sync"
};
extern int rcu_expedited_torture_stats(char *page);
static struct rcu_torture_ops sched_expedited_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
.completed = sched_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
.stats = rcu_expedited_torture_stats,
.irq_capable = 1,
.name = "sched_expedited"
};
/*
@@ -635,7 +652,7 @@ rcu_torture_writer(void *arg)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
old_rp->rtort_pipe_count++;
cur_ops->deferredfree(old_rp);
cur_ops->deferred_free(old_rp);
}
rcu_torture_current_version++;
oldbatch = cur_ops->completed();
@@ -700,7 +717,7 @@ static void rcu_torture_timer(unsigned long unused)
if (p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
spin_lock(&rand_lock);
cur_ops->readdelay(&rand);
cur_ops->read_delay(&rand);
n_rcu_torture_timers++;
spin_unlock(&rand_lock);
preempt_disable();
@@ -738,11 +755,11 @@ rcu_torture_reader(void *arg)
VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
set_user_nice(current, 19);
if (irqreader && cur_ops->irqcapable)
if (irqreader && cur_ops->irq_capable)
setup_timer_on_stack(&t, rcu_torture_timer, 0);
do {
if (irqreader && cur_ops->irqcapable) {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
mod_timer(&t, 1);
}
@@ -757,7 +774,7 @@ rcu_torture_reader(void *arg)
}
if (p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
cur_ops->readdelay(&rand);
cur_ops->read_delay(&rand);
preempt_disable();
pipe_count = p->rtort_pipe_count;
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
@@ -778,7 +795,7 @@ rcu_torture_reader(void *arg)
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
rcutorture_shutdown_absorb("rcu_torture_reader");
if (irqreader && cur_ops->irqcapable)
if (irqreader && cur_ops->irq_capable)
del_timer_sync(&t);
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
@@ -1078,6 +1095,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
&sched_expedited_ops,
&srcu_ops, &sched_ops, &sched_ops_sync, };
mutex_lock(&fullstop_mutex);

View File

@@ -47,6 +47,8 @@
#include <linux/mutex.h>
#include <linux/time.h>
#include "rcutree.h"
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
@@ -73,30 +75,59 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
.n_force_qs_ngp = 0, \
}
struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state);
DEFINE_PER_CPU(struct rcu_data, rcu_data);
struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
extern long rcu_batches_completed_sched(void);
static struct rcu_node *rcu_get_root(struct rcu_state *rsp);
static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
struct rcu_node *rnp, unsigned long flags);
static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
#ifdef CONFIG_HOTPLUG_CPU
static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void __rcu_process_callbacks(struct rcu_state *rsp,
struct rcu_data *rdp);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp);
static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
int preemptable);
#include "rcutree_plugin.h"
/*
* Increment the quiescent state counter.
* The counter is a bit degenerated: We do not need to know
* Note a quiescent state. Because we do not need to know
* how many quiescent states passed, just if there was at least
* one since the start of the grace period. Thus just a flag.
* one since the start of the grace period, this just sets a flag.
*/
void rcu_qsctr_inc(int cpu)
void rcu_sched_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
unsigned long flags;
struct rcu_data *rdp;
local_irq_save(flags);
rdp = &per_cpu(rcu_sched_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed;
rcu_preempt_qs(cpu);
local_irq_restore(flags);
}
void rcu_bh_qsctr_inc(int cpu)
void rcu_bh_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
unsigned long flags;
struct rcu_data *rdp;
local_irq_save(flags);
rdp = &per_cpu(rcu_bh_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed;
local_irq_restore(flags);
}
#ifdef CONFIG_NO_HZ
@@ -111,15 +142,16 @@ static int qhimark = 10000; /* If this many pending, ignore blimit. */
static int qlowmark = 100; /* Once only this many pending, use blimit. */
static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
static int rcu_pending(int cpu);
/*
* Return the number of RCU batches processed thus far for debug & stats.
* Return the number of RCU-sched batches processed thus far for debug & stats.
*/
long rcu_batches_completed(void)
long rcu_batches_completed_sched(void)
{
return rcu_state.completed;
return rcu_sched_state.completed;
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
/*
* Return the number of RCU BH batches processed thus far for debug & stats.
@@ -182,6 +214,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
return 1;
}
/* If preemptable RCU, no point in sending reschedule IPI. */
if (rdp->preemptable)
return 0;
/* The CPU is online, so send it a reschedule IPI. */
if (rdp->cpu != smp_processor_id())
smp_send_reschedule(rdp->cpu);
@@ -194,7 +230,6 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
#endif /* #ifdef CONFIG_SMP */
#ifdef CONFIG_NO_HZ
static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5);
/**
* rcu_enter_nohz - inform RCU that current CPU is entering nohz
@@ -214,7 +249,7 @@ void rcu_enter_nohz(void)
rdtp = &__get_cpu_var(rcu_dynticks);
rdtp->dynticks++;
rdtp->dynticks_nesting--;
WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
WARN_ON_ONCE(rdtp->dynticks & 0x1);
local_irq_restore(flags);
}
@@ -233,7 +268,7 @@ void rcu_exit_nohz(void)
rdtp = &__get_cpu_var(rcu_dynticks);
rdtp->dynticks++;
rdtp->dynticks_nesting++;
WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
local_irq_restore(flags);
smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
}
@@ -252,7 +287,7 @@ void rcu_nmi_enter(void)
if (rdtp->dynticks & 0x1)
return;
rdtp->dynticks_nmi++;
WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs);
WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
}
@@ -271,7 +306,7 @@ void rcu_nmi_exit(void)
return;
smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
rdtp->dynticks_nmi++;
WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs);
WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
}
/**
@@ -287,7 +322,7 @@ void rcu_irq_enter(void)
if (rdtp->dynticks_nesting++)
return;
rdtp->dynticks++;
WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
}
@@ -306,10 +341,10 @@ void rcu_irq_exit(void)
return;
smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
rdtp->dynticks++;
WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
WARN_ON_ONCE(rdtp->dynticks & 0x1);
/* If the interrupt queued a callback, get out of dyntick mode. */
if (__get_cpu_var(rcu_data).nxtlist ||
if (__get_cpu_var(rcu_sched_data).nxtlist ||
__get_cpu_var(rcu_bh_data).nxtlist)
set_need_resched();
}
@@ -462,6 +497,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
for (; rnp_cur < rnp_end; rnp_cur++) {
rcu_print_task_stall(rnp);
if (rnp_cur->qsmask == 0)
continue;
for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
@@ -678,6 +714,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_restore(flags);
}
/*
* Clean up after the prior grace period and let rcu_start_gp() start up
* the next grace period if one is needed. Note that the caller must
* hold rnp->lock, as required by rcu_start_gp(), which will release it.
*/
static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
__releases(rnp->lock)
{
rsp->completed = rsp->gpnum;
rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
/*
* Similar to cpu_quiet(), for which it is a helper function. Allows
* a group of CPUs to be quieted at one go, though all the CPUs in the
@@ -699,7 +748,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
return;
}
rnp->qsmask &= ~mask;
if (rnp->qsmask != 0) {
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
/* Other bits still set at this level, so done. */
spin_unlock_irqrestore(&rnp->lock, flags);
@@ -719,14 +768,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Get here if we are the last CPU to pass through a quiescent
* state for this grace period. Clean up and let rcu_start_gp()
* start up the next grace period if one is needed. Note that
* we still hold rnp->lock, as required by rcu_start_gp(), which
* will release it.
* state for this grace period. Invoke cpu_quiet_msk_finish()
* to clean up and start the next grace period if one is needed.
*/
rsp->completed = rsp->gpnum;
rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
rcu_start_gp(rsp, flags); /* releases rnp->lock. */
cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
}
/*
@@ -833,11 +878,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
if (rnp->qsmaskinit != 0) {
spin_unlock(&rnp->lock); /* irqs already disabled. */
spin_unlock(&rnp->lock); /* irqs remain disabled. */
break;
}
rcu_preempt_offline_tasks(rsp, rnp);
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs already disabled. */
spin_unlock(&rnp->lock); /* irqs remain disabled. */
rnp = rnp->parent;
} while (rnp != NULL);
lastcomp = rsp->completed;
@@ -850,7 +896,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
/*
* Move callbacks from the outgoing CPU to the running CPU.
* Note that the outgoing CPU is now quiscent, so it is now
* (uncharacteristically) safe to access it rcu_data structure.
* (uncharacteristically) safe to access its rcu_data structure.
* Note also that we must carefully retain the order of the
* outgoing CPU's callbacks in order for rcu_barrier() to work
* correctly. Finally, note that we start all the callbacks
@@ -881,8 +927,9 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
*/
static void rcu_offline_cpu(int cpu)
{
__rcu_offline_cpu(cpu, &rcu_state);
__rcu_offline_cpu(cpu, &rcu_sched_state);
__rcu_offline_cpu(cpu, &rcu_bh_state);
rcu_preempt_offline_cpu(cpu);
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -968,6 +1015,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
*/
void rcu_check_callbacks(int cpu, int user)
{
if (!rcu_pending(cpu))
return; /* if nothing for RCU to do. */
if (user ||
(idle_cpu(cpu) && rcu_scheduler_active &&
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -976,17 +1025,16 @@ void rcu_check_callbacks(int cpu, int user)
* Get here if this CPU took its interrupt from user
* mode or from the idle loop, and if this is not a
* nested interrupt. In this case, the CPU is in
* a quiescent state, so count it.
* a quiescent state, so note it.
*
* No memory barrier is required here because both
* rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference
* only CPU-local variables that other CPUs neither
* access nor modify, at least not while the corresponding
* CPU is online.
* rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
* variables that other CPUs neither access nor modify,
* at least not while the corresponding CPU is online.
*/
rcu_qsctr_inc(cpu);
rcu_bh_qsctr_inc(cpu);
rcu_sched_qs(cpu);
rcu_bh_qs(cpu);
} else if (!in_softirq()) {
@@ -994,11 +1042,12 @@ void rcu_check_callbacks(int cpu, int user)
* Get here if this CPU did not take its interrupt from
* softirq, in other words, if it is not interrupting
* a rcu_bh read-side critical section. This is an _bh
* critical section, so count it.
* critical section, so note it.
*/
rcu_bh_qsctr_inc(cpu);
rcu_bh_qs(cpu);
}
rcu_preempt_check_callbacks(cpu);
raise_softirq(RCU_SOFTIRQ);
}
@@ -1137,6 +1186,8 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
WARN_ON_ONCE(rdp->beenonline == 0);
/*
* If an RCU GP has gone long enough, go check for dyntick
* idle CPUs and, if needed, send resched IPIs.
@@ -1175,8 +1226,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
*/
smp_mb(); /* See above block comment. */
__rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data));
__rcu_process_callbacks(&rcu_sched_state,
&__get_cpu_var(rcu_sched_data));
__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
rcu_preempt_process_callbacks();
/*
* Memory references from any later RCU read-side critical sections
@@ -1232,13 +1285,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
}
/*
* Queue an RCU callback for invocation after a grace period.
* Queue an RCU-sched callback for invocation after a grace period.
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_state);
__call_rcu(head, func, &rcu_sched_state);
}
EXPORT_SYMBOL_GPL(call_rcu);
EXPORT_SYMBOL_GPL(call_rcu_sched);
/*
* Queue an RCU for invocation after a quicker grace period.
@@ -1310,10 +1363,11 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
* by the current CPU, returning 1 if so. This function is part of the
* RCU implementation; it is -not- an exported member of the RCU API.
*/
int rcu_pending(int cpu)
static int rcu_pending(int cpu)
{
return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) ||
__rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
__rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
rcu_preempt_pending(cpu);
}
/*
@@ -1325,27 +1379,46 @@ int rcu_pending(int cpu)
int rcu_needs_cpu(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_data, cpu).nxtlist ||
per_cpu(rcu_bh_data, cpu).nxtlist;
return per_cpu(rcu_sched_data, cpu).nxtlist ||
per_cpu(rcu_bh_data, cpu).nxtlist ||
rcu_preempt_needs_cpu(cpu);
}
/*
* Initialize a CPU's per-CPU RCU data. We take this "scorched earth"
* approach so that we don't have to worry about how long the CPU has
* been gone, or whether it ever was online previously. We do trust the
* ->mynode field, as it is constant for a given struct rcu_data and
* initialized during early boot.
*
* Note that only one online or offline event can be happening at a given
* time. Note also that we can accept some slop in the rsp->completed
* access due to the fact that this CPU cannot possibly have any RCU
* callbacks in flight yet.
* Do boot-time initialization of a CPU's per-CPU RCU data.
*/
static void __cpuinit
rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
static void __init
rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
int i;
struct rcu_data *rdp = rsp->rda[cpu];
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
spin_lock_irqsave(&rnp->lock, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen = 0;
#ifdef CONFIG_NO_HZ
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
#endif /* #ifdef CONFIG_NO_HZ */
rdp->cpu = cpu;
spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
* Initialize a CPU's per-CPU RCU data. Note that only one online or
* offline event can be happening at a given time. Note also that we
* can accept some slop in the rsp->completed access due to the fact
* that this CPU cannot possibly have any RCU callbacks in flight yet.
*/
static void __cpuinit
rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
{
unsigned long flags;
long lastcomp;
unsigned long mask;
struct rcu_data *rdp = rsp->rda[cpu];
@@ -1359,17 +1432,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->passed_quiesc = 0; /* We could be racing with new GP, */
rdp->qs_pending = 1; /* so set up to respond to current GP. */
rdp->beenonline = 1; /* We have now been online. */
rdp->preemptable = preemptable;
rdp->passed_quiesc_completed = lastcomp - 1;
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen = 0;
rdp->blimit = blimit;
#ifdef CONFIG_NO_HZ
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
#endif /* #ifdef CONFIG_NO_HZ */
rdp->cpu = cpu;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
@@ -1410,16 +1475,16 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
static void __cpuinit rcu_online_cpu(int cpu)
{
rcu_init_percpu_data(cpu, &rcu_state);
rcu_init_percpu_data(cpu, &rcu_bh_state);
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
rcu_preempt_init_percpu_data(cpu);
}
/*
* Handle CPU online/offline notifcation events.
* Handle CPU online/offline notification events.
*/
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
int __cpuinit rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1491,6 +1556,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
spin_lock_init(&rnp->lock);
rnp->gpnum = 0;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;
rnp->grplo = j * cpustride;
@@ -1508,16 +1574,20 @@ static void __init rcu_init_one(struct rcu_state *rsp)
j / rsp->levelspread[i - 1];
}
rnp->level = i;
INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
}
}
}
/*
* Helper macro for __rcu_init(). To be used nowhere else!
* Assigns leaf node pointers into each CPU's rcu_data structure.
* Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
* nowhere else! Assigns leaf node pointers into each CPU's rcu_data
* structure.
*/
#define RCU_DATA_PTR_INIT(rsp, rcu_data) \
#define RCU_INIT_FLAVOR(rsp, rcu_data) \
do { \
rcu_init_one(rsp); \
rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
j = 0; \
for_each_possible_cpu(i) { \
@@ -1525,32 +1595,43 @@ do { \
j++; \
per_cpu(rcu_data, i).mynode = &rnp[j]; \
(rsp)->rda[i] = &per_cpu(rcu_data, i); \
rcu_boot_init_percpu_data(i, rsp); \
} \
} while (0)
static struct notifier_block __cpuinitdata rcu_nb = {
.notifier_call = rcu_cpu_notify,
};
#ifdef CONFIG_TREE_PREEMPT_RCU
void __init __rcu_init(void)
void __init __rcu_init_preempt(void)
{
int i; /* All used by RCU_DATA_PTR_INIT(). */
int i; /* All used by RCU_INIT_FLAVOR(). */
int j;
struct rcu_node *rnp;
printk(KERN_INFO "Hierarchical RCU implementation.\n");
RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
}
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
void __init __rcu_init_preempt(void)
{
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
void __init __rcu_init(void)
{
int i; /* All used by RCU_INIT_FLAVOR(). */
int j;
struct rcu_node *rnp;
rcu_bootup_announce();
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
rcu_init_one(&rcu_state);
RCU_DATA_PTR_INIT(&rcu_state, rcu_data);
rcu_init_one(&rcu_bh_state);
RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data);
for_each_online_cpu(i)
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
__rcu_init_preempt();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
module_param(blimit, int, 0);

View File

@@ -1,10 +1,259 @@
/*
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
* Internal non-public definitions.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright IBM Corporation, 2008
*
* Author: Ingo Molnar <mingo@elte.hu>
* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/seqlock.h>
/*
* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
* In theory, it should be possible to add more levels straightforwardly.
* In practice, this has not been tested, so there is probably some
* bug somewhere.
*/
#define MAX_RCU_LVLS 3
#define RCU_FANOUT (CONFIG_RCU_FANOUT)
#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT
# define NUM_RCU_LVLS 1
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0
#elif NR_CPUS <= RCU_FANOUT_SQ
# define NUM_RCU_LVLS 2
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
# define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0
#elif NR_CPUS <= RCU_FANOUT_CUBE
# define NUM_RCU_LVLS 3
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
# define NUM_RCU_LVL_3 NR_CPUS
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT */
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
/*
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
int dynticks_nesting; /* Track nesting level, sort of. */
int dynticks; /* Even value for dynticks-idle, else odd. */
int dynticks_nmi; /* Even value for either dynticks-idle or */
/* not in nmi handler, else odd. So this */
/* remains even for nmi from irq handler. */
};
/*
* Definition for node within the RCU grace-period-detection hierarchy.
*/
struct rcu_node {
spinlock_t lock;
long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long qsmask; /* CPUs or groups that need to switch in */
/* order for current grace period to proceed.*/
unsigned long qsmaskinit;
/* Per-GP initialization for qsmask. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
int grplo; /* lowest-numbered CPU or group here. */
int grphi; /* highest-numbered CPU or group here. */
u8 grpnum; /* CPU/group number for next level up. */
u8 level; /* root is at level 0. */
struct rcu_node *parent;
struct list_head blocked_tasks[2];
/* Tasks blocked in RCU read-side critsect. */
} ____cacheline_internodealigned_in_smp;
/* Index values for nxttail array in struct rcu_data. */
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
#define RCU_NEXT_TAIL 3
#define RCU_NEXT_SIZE 4
/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
long completed; /* Track rsp->completed gp number */
/* in order to detect GP end. */
long gpnum; /* Highest gp number that this CPU */
/* is aware of having started. */
long passed_quiesc_completed;
/* Value of completed at time of qs. */
bool passed_quiesc; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
bool preemptable; /* Preemptable RCU? */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
/* 2) batch handling */
/*
* If nxtlist is not NULL, it is partitioned as follows.
* Any of the partitions might be empty, in which case the
* pointer to that partition will be equal to the pointer for
* the following partition. When the list is empty, all of
* the nxttail elements point to nxtlist, which is NULL.
*
* [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
* Entries that might have arrived after current GP ended
* [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
* Entries known to have arrived before current GP ended
* [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
* Entries that batch # <= ->completed - 1: waiting for current GP
* [nxtlist, *nxttail[RCU_DONE_TAIL]):
* Entries that batch # <= ->completed
* The grace period for these entries has completed, and
* the other grace-period-completed entries may be moved
* here temporarily in rcu_process_callbacks().
*/
struct rcu_head *nxtlist;
struct rcu_head **nxttail[RCU_NEXT_SIZE];
long qlen; /* # of queued callbacks */
long blimit; /* Upper limit on a processed batch */
#ifdef CONFIG_NO_HZ
/* 3) dynticks interface. */
struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
int dynticks_snap; /* Per-GP tracking for dynticks. */
int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
#endif /* #ifdef CONFIG_NO_HZ */
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
#ifdef CONFIG_NO_HZ
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
#endif /* #ifdef CONFIG_NO_HZ */
unsigned long offline_fqs; /* Kicked due to being offline. */
unsigned long resched_ipi; /* Sent a resched IPI. */
/* 5) __rcu_pending() statistics. */
long n_rcu_pending; /* rcu_pending() calls since boot. */
long n_rp_qs_pending;
long n_rp_cb_ready;
long n_rp_cpu_needs_gp;
long n_rp_gp_completed;
long n_rp_gp_started;
long n_rp_need_fqs;
long n_rp_need_nothing;
int cpu;
};
/* Values for signaled field in struct rcu_state. */
#define RCU_GP_INIT 0 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */
#define RCU_FORCE_QS 2 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
#define RCU_SIGNAL_INIT RCU_FORCE_QS
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */
#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */
#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
/* to take at least one */
/* scheduling clock irq */
/* before ratting on them. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
* RCU global state, including node hierarchy. This hierarchy is
* represented in "heap" form in a dense array. The root (first level)
* of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
* level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
* and the third level in ->node[m+1] and following (->node[m+1] referenced
* by ->level[2]). The number of levels is determined by the number of
* CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
* consisting of a single rcu_node.
*/
struct rcu_state {
struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */
/* The following fields are guarded by the root rcu_node's lock. */
u8 signaled ____cacheline_internodealigned_in_smp;
/* Force QS state. */
long gpnum; /* Current gp number. */
long completed; /* # of last completed gp. */
spinlock_t onofflock; /* exclude on/offline and */
/* starting new GP. */
spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
/* force_quiescent_state(). */
unsigned long n_force_qs_lh; /* ~Number of calls leaving */
/* due to lock unavailable. */
unsigned long n_force_qs_ngp; /* Number of calls leaving */
/* due to no GP active. */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
unsigned long gp_start; /* Time at which GP started, */
/* but in jiffies. */
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
#ifdef CONFIG_NO_HZ
long dynticks_completed; /* Value of completed @ snap. */
#endif /* #ifdef CONFIG_NO_HZ */
};
#ifdef RCU_TREE_NONCORE
/*
* RCU implementation internal declarations:
*/
extern struct rcu_state rcu_state;
DECLARE_PER_CPU(struct rcu_data, rcu_data);
extern struct rcu_state rcu_sched_state;
DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
extern struct rcu_state rcu_bh_state;
DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
#ifdef CONFIG_TREE_PREEMPT_RCU
extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#endif /* #ifdef RCU_TREE_NONCORE */

532
kernel/rcutree_plugin.h Normal file
View File

@@ -0,0 +1,532 @@
/*
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
* Internal non-public definitions that provide either classic
* or preemptable semantics.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright Red Hat, 2009
* Copyright IBM Corporation, 2009
*
* Author: Ingo Molnar <mingo@elte.hu>
* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
/*
* Tell them what RCU they are running.
*/
static inline void rcu_bootup_announce(void)
{
printk(KERN_INFO
"Experimental preemptable hierarchical RCU implementation.\n");
}
/*
* Return the number of RCU-preempt batches processed thus far
* for debug and statistics.
*/
long rcu_batches_completed_preempt(void)
{
return rcu_preempt_state.completed;
}
EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
/*
* Return the number of RCU batches processed thus far for debug & stats.
*/
long rcu_batches_completed(void)
{
return rcu_batches_completed_preempt();
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
* Record a preemptable-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
* not in a quiescent state. There might be any number of tasks blocked
* while in an RCU read-side critical section.
*/
static void rcu_preempt_qs_record(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
rdp->passed_quiesc = 1;
rdp->passed_quiesc_completed = rdp->completed;
}
/*
* We have entered the scheduler or are between softirqs in ksoftirqd.
* If we are in an RCU read-side critical section, we need to reflect
* that in the state of the rcu_node structure corresponding to this CPU.
* Caller must disable hardirqs.
*/
static void rcu_preempt_qs(int cpu)
{
struct task_struct *t = current;
int phase;
struct rcu_data *rdp;
struct rcu_node *rnp;
if (t->rcu_read_lock_nesting &&
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
/* Possibly blocking in an RCU read-side critical section. */
rdp = rcu_preempt_state.rda[cpu];
rnp = rdp->mynode;
spin_lock(&rnp->lock);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
t->rcu_blocked_node = rnp;
/*
* If this CPU has already checked in, then this task
* will hold up the next grace period rather than the
* current grace period. Queue the task accordingly.
* If the task is queued for the current grace period
* (i.e., this CPU has not yet passed through a quiescent
* state for the current grace period), then as long
* as that task remains queued, the current grace period
* cannot end.
*/
phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
smp_mb(); /* Ensure later ctxt swtch seen after above. */
spin_unlock(&rnp->lock);
}
/*
* Either we were not in an RCU read-side critical section to
* begin with, or we have now recorded that critical section
* globally. Either way, we can now note a quiescent state
* for this CPU. Again, if we were in an RCU read-side critical
* section, and if that critical section was blocking the current
* grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period.
*/
rcu_preempt_qs_record(cpu);
t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
RCU_READ_UNLOCK_GOT_QS);
}
/*
* Tree-preemptable RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
ACCESS_ONCE(current->rcu_read_lock_nesting)++;
barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
static void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
unsigned long flags;
unsigned long mask;
struct rcu_node *rnp;
int special;
/* NMI handlers cannot block and cannot safely manipulate state. */
if (in_nmi())
return;
local_irq_save(flags);
/*
* If RCU core is waiting for this CPU to exit critical section,
* let it know that we have done so.
*/
special = t->rcu_read_unlock_special;
if (special & RCU_READ_UNLOCK_NEED_QS) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
}
/* Hardware IRQ handlers cannot block. */
if (in_irq()) {
local_irq_restore(flags);
return;
}
/* Clean up if blocked during RCU read-side critical section. */
if (special & RCU_READ_UNLOCK_BLOCKED) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
/*
* Remove this task from the list it blocked on. The
* task can migrate while we acquire the lock, but at
* most one time. So at most two passes through loop.
*/
for (;;) {
rnp = t->rcu_blocked_node;
spin_lock(&rnp->lock);
if (rnp == t->rcu_blocked_node)
break;
spin_unlock(&rnp->lock);
}
empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
list_del_init(&t->rcu_node_entry);
t->rcu_blocked_node = NULL;
/*
* If this was the last task on the current list, and if
* we aren't waiting on any CPUs, report the quiescent state.
* Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
* drop rnp->lock and restore irq.
*/
if (!empty && rnp->qsmask == 0 &&
list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
t->rcu_read_unlock_special &=
~(RCU_READ_UNLOCK_NEED_QS |
RCU_READ_UNLOCK_GOT_QS);
if (rnp->parent == NULL) {
/* Only one rcu_node in the tree. */
cpu_quiet_msk_finish(&rcu_preempt_state, flags);
return;
}
/* Report up the rest of the hierarchy. */
mask = rnp->grpmask;
spin_unlock_irqrestore(&rnp->lock, flags);
rnp = rnp->parent;
spin_lock_irqsave(&rnp->lock, flags);
cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
return;
}
spin_unlock(&rnp->lock);
}
local_irq_restore(flags);
}
/*
* Tree-preemptable RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, printing out the tid of each.
*/
static void rcu_print_task_stall(struct rcu_node *rnp)
{
unsigned long flags;
struct list_head *lp;
int phase = rnp->gpnum & 0x1;
struct task_struct *t;
if (!list_empty(&rnp->blocked_tasks[phase])) {
spin_lock_irqsave(&rnp->lock, flags);
phase = rnp->gpnum & 0x1; /* re-read under lock. */
lp = &rnp->blocked_tasks[phase];
list_for_each_entry(t, lp, rcu_node_entry)
printk(" P%d", t->pid);
spin_unlock_irqrestore(&rnp->lock, flags);
}
}
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
* Check for preempted RCU readers for the specified rcu_node structure.
* If the caller needs a reliable answer, it must hold the rcu_node's
* >lock.
*/
static int rcu_preempted_readers(struct rcu_node *rnp)
{
return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Handle tasklist migration for case in which all CPUs covered by the
* specified rcu_node have gone offline. Move them up to the root
* rcu_node. The reason for not just moving them to the immediate
* parent is to remove the need for rcu_read_unlock_special() to
* make more than two attempts to acquire the target rcu_node's lock.
*
* The caller must hold rnp->lock with irqs disabled.
*/
static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp)
{
int i;
struct list_head *lp;
struct list_head *lp_root;
struct rcu_node *rnp_root = rcu_get_root(rsp);
struct task_struct *tp;
if (rnp == rnp_root) {
WARN_ONCE(1, "Last CPU thought to be offlined?");
return; /* Shouldn't happen: at least one CPU online. */
}
/*
* Move tasks up to root rcu_node. Rely on the fact that the
* root rcu_node can be at most one ahead of the rest of the
* rcu_nodes in terms of gp_num value. This fact allows us to
* move the blocked_tasks[] array directly, element by element.
*/
for (i = 0; i < 2; i++) {
lp = &rnp->blocked_tasks[i];
lp_root = &rnp_root->blocked_tasks[i];
while (!list_empty(lp)) {
tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
spin_lock(&rnp_root->lock); /* irqs already disabled */
list_del(&tp->rcu_node_entry);
tp->rcu_blocked_node = rnp_root;
list_add(&tp->rcu_node_entry, lp_root);
spin_unlock(&rnp_root->lock); /* irqs remain disabled */
}
}
}
/*
* Do CPU-offline processing for preemptable RCU.
*/
static void rcu_preempt_offline_cpu(int cpu)
{
__rcu_offline_cpu(cpu, &rcu_preempt_state);
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
/*
* Check for a quiescent state from the current CPU. When a task blocks,
* the task is recorded in the corresponding CPU's rcu_node structure,
* which is checked elsewhere.
*
* Caller must disable hard irqs.
*/
static void rcu_preempt_check_callbacks(int cpu)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0) {
t->rcu_read_unlock_special &=
~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
rcu_preempt_qs_record(cpu);
return;
}
if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
rcu_preempt_qs_record(cpu);
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
} else if (!(t->rcu_read_unlock_special &
RCU_READ_UNLOCK_NEED_QS)) {
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
}
}
}
/*
* Process callbacks for preemptable RCU.
*/
static void rcu_preempt_process_callbacks(void)
{
__rcu_process_callbacks(&rcu_preempt_state,
&__get_cpu_var(rcu_preempt_data));
}
/*
* Queue a preemptable-RCU callback for invocation after a grace period.
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_preempt_state);
}
EXPORT_SYMBOL_GPL(call_rcu);
/*
* Check to see if there is any immediate preemptable-RCU-related work
* to be done.
*/
static int rcu_preempt_pending(int cpu)
{
return __rcu_pending(&rcu_preempt_state,
&per_cpu(rcu_preempt_data, cpu));
}
/*
* Does preemptable RCU need the CPU to stay out of dynticks mode?
*/
static int rcu_preempt_needs_cpu(int cpu)
{
return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
}
/*
* Initialize preemptable RCU's per-CPU data.
*/
static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
{
rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
}
/*
* Check for a task exiting while in a preemptable-RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
* is enabled.
*/
void exit_rcu(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0)
return;
t->rcu_read_lock_nesting = 1;
rcu_read_unlock();
}
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
/*
* Tell them what RCU they are running.
*/
static inline void rcu_bootup_announce(void)
{
printk(KERN_INFO "Hierarchical RCU implementation.\n");
}
/*
* Return the number of RCU batches processed thus far for debug & stats.
*/
long rcu_batches_completed(void)
{
return rcu_batches_completed_sched();
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
* Because preemptable RCU does not exist, we never have to check for
* CPUs being in quiescent states.
*/
static void rcu_preempt_qs(int cpu)
{
}
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
/*
* Because preemptable RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
static void rcu_print_task_stall(struct rcu_node *rnp)
{
}
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
* Because preemptable RCU does not exist, there are never any preempted
* RCU readers.
*/
static int rcu_preempted_readers(struct rcu_node *rnp)
{
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Because preemptable RCU does not exist, it never needs to migrate
* tasks that were blocked within RCU read-side critical sections.
*/
static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp)
{
}
/*
* Because preemptable RCU does not exist, it never needs CPU-offline
* processing.
*/
static void rcu_preempt_offline_cpu(int cpu)
{
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
/*
* Because preemptable RCU does not exist, it never has any callbacks
* to check.
*/
void rcu_preempt_check_callbacks(int cpu)
{
}
/*
* Because preemptable RCU does not exist, it never has any callbacks
* to process.
*/
void rcu_preempt_process_callbacks(void)
{
}
/*
* In classic RCU, call_rcu() is just call_rcu_sched().
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
call_rcu_sched(head, func);
}
EXPORT_SYMBOL_GPL(call_rcu);
/*
* Because preemptable RCU does not exist, it never has any work to do.
*/
static int rcu_preempt_pending(int cpu)
{
return 0;
}
/*
* Because preemptable RCU does not exist, it never needs any CPU.
*/
static int rcu_preempt_needs_cpu(int cpu)
{
return 0;
}
/*
* Because preemptable RCU does not exist, there is no per-CPU
* data to initialize.
*/
static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
{
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */

View File

@@ -43,6 +43,7 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#define RCU_TREE_NONCORE
#include "rcutree.h"
static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
@@ -76,8 +77,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
static int show_rcudata(struct seq_file *m, void *unused)
{
seq_puts(m, "rcu:\n");
PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m);
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_puts(m, "rcu_preempt:\n");
PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_puts(m, "rcu_sched:\n");
PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
seq_puts(m, "rcu_bh:\n");
PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
return 0;
@@ -102,7 +107,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
return;
seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
rdp->completed, rdp->gpnum,
rdp->passed_quiesc, rdp->passed_quiesc_completed,
rdp->qs_pending);
@@ -124,8 +129,12 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
#endif /* #ifdef CONFIG_NO_HZ */
seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
seq_puts(m, "\"rcu:\"\n");
PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m);
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_puts(m, "\"rcu_preempt:\"\n");
PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_puts(m, "\"rcu_sched:\"\n");
PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
seq_puts(m, "\"rcu_bh:\"\n");
PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
return 0;
@@ -171,8 +180,12 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
static int show_rcuhier(struct seq_file *m, void *unused)
{
seq_puts(m, "rcu:\n");
print_one_rcu_state(m, &rcu_state);
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_puts(m, "rcu_preempt:\n");
print_one_rcu_state(m, &rcu_preempt_state);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_puts(m, "rcu_sched:\n");
print_one_rcu_state(m, &rcu_sched_state);
seq_puts(m, "rcu_bh:\n");
print_one_rcu_state(m, &rcu_bh_state);
return 0;
@@ -193,8 +206,12 @@ static struct file_operations rcuhier_fops = {
static int show_rcugp(struct seq_file *m, void *unused)
{
seq_printf(m, "rcu: completed=%ld gpnum=%ld\n",
rcu_state.completed, rcu_state.gpnum);
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n",
rcu_preempt_state.completed, rcu_preempt_state.gpnum);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n",
rcu_sched_state.completed, rcu_sched_state.gpnum);
seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
rcu_bh_state.completed, rcu_bh_state.gpnum);
return 0;
@@ -243,8 +260,12 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
static int show_rcu_pending(struct seq_file *m, void *unused)
{
seq_puts(m, "rcu:\n");
print_rcu_pendings(m, &rcu_state);
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_puts(m, "rcu_preempt:\n");
print_rcu_pendings(m, &rcu_preempt_state);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_puts(m, "rcu_sched:\n");
print_rcu_pendings(m, &rcu_sched_state);
seq_puts(m, "rcu_bh:\n");
print_rcu_pendings(m, &rcu_bh_state);
return 0;
@@ -264,62 +285,47 @@ static struct file_operations rcu_pending_fops = {
};
static struct dentry *rcudir;
static struct dentry *datadir;
static struct dentry *datadir_csv;
static struct dentry *gpdir;
static struct dentry *hierdir;
static struct dentry *rcu_pendingdir;
static int __init rcuclassic_trace_init(void)
{
struct dentry *retval;
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto out;
goto free_out;
datadir = debugfs_create_file("rcudata", 0444, rcudir,
retval = debugfs_create_file("rcudata", 0444, rcudir,
NULL, &rcudata_fops);
if (!datadir)
if (!retval)
goto free_out;
datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir,
retval = debugfs_create_file("rcudata.csv", 0444, rcudir,
NULL, &rcudata_csv_fops);
if (!datadir_csv)
if (!retval)
goto free_out;
gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
if (!gpdir)
retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
if (!retval)
goto free_out;
hierdir = debugfs_create_file("rcuhier", 0444, rcudir,
retval = debugfs_create_file("rcuhier", 0444, rcudir,
NULL, &rcuhier_fops);
if (!hierdir)
if (!retval)
goto free_out;
rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
retval = debugfs_create_file("rcu_pending", 0444, rcudir,
NULL, &rcu_pending_fops);
if (!rcu_pendingdir)
if (!retval)
goto free_out;
return 0;
free_out:
if (datadir)
debugfs_remove(datadir);
if (datadir_csv)
debugfs_remove(datadir_csv);
if (gpdir)
debugfs_remove(gpdir);
debugfs_remove(rcudir);
out:
debugfs_remove_recursive(rcudir);
return 1;
}
static void __exit rcuclassic_trace_cleanup(void)
{
debugfs_remove(datadir);
debugfs_remove(datadir_csv);
debugfs_remove(gpdir);
debugfs_remove(hierdir);
debugfs_remove(rcu_pendingdir);
debugfs_remove(rcudir);
debugfs_remove_recursive(rcudir);
}

View File

@@ -5325,7 +5325,7 @@ need_resched:
preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
rcu_qsctr_inc(cpu);
rcu_sched_qs(cpu);
prev = rq->curr;
switch_count = &prev->nivcsw;
@@ -7053,6 +7053,11 @@ fail:
return ret;
}
#define RCU_MIGRATION_IDLE 0
#define RCU_MIGRATION_NEED_QS 1
#define RCU_MIGRATION_GOT_QS 2
#define RCU_MIGRATION_MUST_SYNC 3
/*
* migration_thread - this is a highprio system thread that performs
* thread migration by bumping thread off CPU then 'pushing' onto
@@ -7060,6 +7065,7 @@ fail:
*/
static int migration_thread(void *data)
{
int badcpu;
int cpu = (long)data;
struct rq *rq;
@@ -7094,8 +7100,17 @@ static int migration_thread(void *data)
req = list_entry(head->next, struct migration_req, list);
list_del_init(head->next);
spin_unlock(&rq->lock);
__migrate_task(req->task, cpu, req->dest_cpu);
if (req->task != NULL) {
spin_unlock(&rq->lock);
__migrate_task(req->task, cpu, req->dest_cpu);
} else if (likely(cpu == (badcpu = smp_processor_id()))) {
req->dest_cpu = RCU_MIGRATION_GOT_QS;
spin_unlock(&rq->lock);
} else {
req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
spin_unlock(&rq->lock);
WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
}
local_irq_enable();
complete(&req->done);
@@ -10583,3 +10598,113 @@ struct cgroup_subsys cpuacct_subsys = {
.subsys_id = cpuacct_subsys_id,
};
#endif /* CONFIG_CGROUP_CPUACCT */
#ifndef CONFIG_SMP
int rcu_expedited_torture_stats(char *page)
{
return 0;
}
EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
void synchronize_sched_expedited(void)
{
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#else /* #ifndef CONFIG_SMP */
static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
static DEFINE_MUTEX(rcu_sched_expedited_mutex);
#define RCU_EXPEDITED_STATE_POST -2
#define RCU_EXPEDITED_STATE_IDLE -1
static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
int rcu_expedited_torture_stats(char *page)
{
int cnt = 0;
int cpu;
cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
for_each_online_cpu(cpu) {
cnt += sprintf(&page[cnt], " %d:%d",
cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
}
cnt += sprintf(&page[cnt], "\n");
return cnt;
}
EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
static long synchronize_sched_expedited_count;
/*
* Wait for an rcu-sched grace period to elapse, but use "big hammer"
* approach to force grace period to end quickly. This consumes
* significant time on all CPUs, and is thus not recommended for
* any sort of common-case code.
*
* Note that it is illegal to call this function while holding any
* lock that is acquired by a CPU-hotplug notifier. Failing to
* observe this restriction will result in deadlock.
*/
void synchronize_sched_expedited(void)
{
int cpu;
unsigned long flags;
bool need_full_sync = 0;
struct rq *rq;
struct migration_req *req;
long snap;
int trycount = 0;
smp_mb(); /* ensure prior mod happens before capturing snap. */
snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
get_online_cpus();
while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
put_online_cpus();
if (trycount++ < 10)
udelay(trycount * num_online_cpus());
else {
synchronize_sched();
return;
}
if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
smp_mb(); /* ensure test happens before caller kfree */
return;
}
get_online_cpus();
}
rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
for_each_online_cpu(cpu) {
rq = cpu_rq(cpu);
req = &per_cpu(rcu_migration_req, cpu);
init_completion(&req->done);
req->task = NULL;
req->dest_cpu = RCU_MIGRATION_NEED_QS;
spin_lock_irqsave(&rq->lock, flags);
list_add(&req->list, &rq->migration_queue);
spin_unlock_irqrestore(&rq->lock, flags);
wake_up_process(rq->migration_thread);
}
for_each_online_cpu(cpu) {
rcu_expedited_state = cpu;
req = &per_cpu(rcu_migration_req, cpu);
rq = cpu_rq(cpu);
wait_for_completion(&req->done);
spin_lock_irqsave(&rq->lock, flags);
if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
need_full_sync = 1;
req->dest_cpu = RCU_MIGRATION_IDLE;
spin_unlock_irqrestore(&rq->lock, flags);
}
rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
mutex_unlock(&rcu_sched_expedited_mutex);
put_online_cpus();
if (need_full_sync)
synchronize_sched();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */

View File

@@ -227,7 +227,7 @@ restart:
preempt_count() = prev_count;
}
rcu_bh_qsctr_inc(cpu);
rcu_bh_qs(cpu);
}
h++;
pending >>= 1;
@@ -721,7 +721,7 @@ static int ksoftirqd(void * __bind_cpu)
preempt_enable_no_resched();
cond_resched();
preempt_disable();
rcu_qsctr_inc((long)__bind_cpu);
rcu_sched_qs((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);

View File

@@ -1156,8 +1156,7 @@ void update_process_times(int user_tick)
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers();
if (rcu_pending(cpu))
rcu_check_callbacks(cpu, user_tick);
rcu_check_callbacks(cpu, user_tick);
printk_tick();
scheduler_tick();
run_posix_cpu_timers(p);