rcu: Rework rcu_barrier() and callback-migration logic

This commit reworks rcu_barrier() and callback-migration logic to
permit allowing rcu_barrier() to run concurrently with CPU-hotplug
operations.  The key trick is for callback migration to check to see if
an rcu_barrier() is in flight, and, if so, enqueue the ->barrier_head
callback on its behalf.

This commit adds synchronization with RCU's CPU-hotplug notifiers.  Taken
together, this will permit a later commit to remove the cpus_read_lock()
and cpus_read_unlock() calls from rcu_barrier().

[ paulmck: Updated per kbuild test robot feedback. ]
[ paulmck: Updated per reviews session with Neeraj, Frederic, Uladzislau, and Boqun. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Paul E. McKenney 2021-12-14 13:15:18 -08:00
parent 0cabb47af3
commit a16578dd5e
2 changed files with 63 additions and 16 deletions

View File

@ -3987,13 +3987,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
}
/*
* Called with preemption disabled, and from cross-cpu IRQ context.
* If needed, entrain an rcu_barrier() callback on rdp->cblist.
*/
static void rcu_barrier_func(void *cpu_in)
static void rcu_barrier_entrain(struct rcu_data *rdp)
{
uintptr_t cpu = (uintptr_t)cpu_in;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
lockdep_assert_held(&rdp->barrier_lock);
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
return;
rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
@ -4003,10 +4006,26 @@ static void rcu_barrier_func(void *cpu_in)
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
debug_rcu_head_unqueue(&rdp->barrier_head);
rcu_barrier_trace(TPS("IRQNQ"), -1,
rcu_state.barrier_sequence);
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
smp_store_release(&rdp->barrier_seq_snap, gseq);
}
/*
* Called with preemption disabled, and from cross-cpu IRQ context.
*/
static void rcu_barrier_handler(void *cpu_in)
{
uintptr_t cpu = (uintptr_t)cpu_in;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
lockdep_assert_irqs_disabled();
WARN_ON_ONCE(cpu != rdp->cpu);
WARN_ON_ONCE(cpu != smp_processor_id());
raw_spin_lock(&rdp->barrier_lock);
rcu_barrier_entrain(rdp);
raw_spin_unlock(&rdp->barrier_lock);
}
/**
@ -4020,6 +4039,8 @@ static void rcu_barrier_func(void *cpu_in)
void rcu_barrier(void)
{
uintptr_t cpu;
unsigned long flags;
unsigned long gseq;
struct rcu_data *rdp;
unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
@ -4038,6 +4059,7 @@ void rcu_barrier(void)
/* Mark the start of the barrier operation. */
rcu_seq_start(&rcu_state.barrier_sequence);
gseq = rcu_state.barrier_sequence;
rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
/*
@ -4058,19 +4080,30 @@ void rcu_barrier(void)
*/
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
retry:
if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
continue;
raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
WRITE_ONCE(rdp->barrier_seq_snap, gseq);
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
continue;
}
if (cpu_online(cpu)) {
rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
} else {
if (!rcu_rdp_cpu_online(rdp)) {
rcu_barrier_entrain(rdp);
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
local_irq_disable();
rcu_barrier_func((void *)cpu);
local_irq_enable();
continue;
}
raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
schedule_timeout_uninterruptible(1);
goto retry;
}
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
}
cpus_read_unlock();
@ -4087,6 +4120,12 @@ void rcu_barrier(void)
/* Mark the end of the barrier operation. */
rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
rcu_seq_end(&rcu_state.barrier_sequence);
gseq = rcu_state.barrier_sequence;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
WRITE_ONCE(rdp->barrier_seq_snap, gseq);
}
/* Other rcu_barrier() invocations can now safely proceed. */
mutex_unlock(&rcu_state.barrier_mutex);
@ -4134,6 +4173,8 @@ rcu_boot_init_percpu_data(int cpu)
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
raw_spin_lock_init(&rdp->barrier_lock);
rdp->barrier_seq_snap = rcu_state.barrier_sequence;
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
@ -4284,8 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu)
local_irq_save(flags);
arch_spin_lock(&rcu_state.ofl_lock);
rcu_dynticks_eqs_online();
raw_spin_lock(&rdp->barrier_lock);
raw_spin_lock_rcu_node(rnp);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
raw_spin_unlock(&rdp->barrier_lock);
newcpu = !(rnp->expmaskinitnext & mask);
rnp->expmaskinitnext |= mask;
/* Allow lockless access for expedited grace periods. */
@ -4372,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */
local_irq_save(flags);
raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
rcu_barrier_entrain(rdp);
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
@ -4382,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
needwake = rcu_advance_cbs(my_rnp, rdp) ||
rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */
needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_disable(&rdp->cblist);
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
!rcu_segcblist_n_cbs(&my_rdp->cblist));
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
if (rcu_rdp_is_offloaded(my_rdp)) {
raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
__call_rcu_nocb_wake(my_rdp, true, flags);

View File

@ -188,6 +188,8 @@ struct rcu_data {
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */
/* 4) rcu_barrier(), OOM callbacks, and expediting. */
raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */
unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */
struct rcu_head barrier_head;
int exp_dynticks_snap; /* Double-check need for IPI. */