forked from Minki/linux
psi: eliminate kthread_worker from psi trigger scheduling mechanism
Each psi group requires a dedicated kthread_delayed_work and kthread_worker. Since no other work can be performed using psi_group's kthread_worker, the same result can be obtained using a task_struct and a timer directly. This makes psi triggering simpler by removing lists and locks involved with kthread_worker usage and eliminates the need for poll_scheduled atomic use in the hot path. Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20200528195442.190116-1-surenb@google.com
This commit is contained in:
parent
f4291df103
commit
461daba06b
@ -153,9 +153,10 @@ struct psi_group {
|
|||||||
unsigned long avg[NR_PSI_STATES - 1][3];
|
unsigned long avg[NR_PSI_STATES - 1][3];
|
||||||
|
|
||||||
/* Monitor work control */
|
/* Monitor work control */
|
||||||
atomic_t poll_scheduled;
|
struct task_struct __rcu *poll_task;
|
||||||
struct kthread_worker __rcu *poll_kworker;
|
struct timer_list poll_timer;
|
||||||
struct kthread_delayed_work poll_work;
|
wait_queue_head_t poll_wait;
|
||||||
|
atomic_t poll_wakeup;
|
||||||
|
|
||||||
/* Protects data used by the monitor */
|
/* Protects data used by the monitor */
|
||||||
struct mutex trigger_lock;
|
struct mutex trigger_lock;
|
||||||
|
@ -190,7 +190,6 @@ static void group_init(struct psi_group *group)
|
|||||||
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
||||||
mutex_init(&group->avgs_lock);
|
mutex_init(&group->avgs_lock);
|
||||||
/* Init trigger-related members */
|
/* Init trigger-related members */
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
|
||||||
mutex_init(&group->trigger_lock);
|
mutex_init(&group->trigger_lock);
|
||||||
INIT_LIST_HEAD(&group->triggers);
|
INIT_LIST_HEAD(&group->triggers);
|
||||||
memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
|
memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
|
||||||
@ -199,7 +198,7 @@ static void group_init(struct psi_group *group)
|
|||||||
memset(group->polling_total, 0, sizeof(group->polling_total));
|
memset(group->polling_total, 0, sizeof(group->polling_total));
|
||||||
group->polling_next_update = ULLONG_MAX;
|
group->polling_next_update = ULLONG_MAX;
|
||||||
group->polling_until = 0;
|
group->polling_until = 0;
|
||||||
rcu_assign_pointer(group->poll_kworker, NULL);
|
rcu_assign_pointer(group->poll_task, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init psi_init(void)
|
void __init psi_init(void)
|
||||||
@ -547,47 +546,38 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
|||||||
return now + group->poll_min_period;
|
return now + group->poll_min_period;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Schedule polling if it's not already scheduled. */
|
||||||
* Schedule polling if it's not already scheduled. It's safe to call even from
|
|
||||||
* hotpath because even though kthread_queue_delayed_work takes worker->lock
|
|
||||||
* spinlock that spinlock is never contended due to poll_scheduled atomic
|
|
||||||
* preventing such competition.
|
|
||||||
*/
|
|
||||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
|
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
|
||||||
{
|
{
|
||||||
struct kthread_worker *kworker;
|
struct task_struct *task;
|
||||||
|
|
||||||
/* Do not reschedule if already scheduled */
|
/*
|
||||||
if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0)
|
* Do not reschedule if already scheduled.
|
||||||
|
* Possible race with a timer scheduled after this check but before
|
||||||
|
* mod_timer below can be tolerated because group->polling_next_update
|
||||||
|
* will keep updates on schedule.
|
||||||
|
*/
|
||||||
|
if (timer_pending(&group->poll_timer))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
kworker = rcu_dereference(group->poll_kworker);
|
task = rcu_dereference(group->poll_task);
|
||||||
/*
|
/*
|
||||||
* kworker might be NULL in case psi_trigger_destroy races with
|
* kworker might be NULL in case psi_trigger_destroy races with
|
||||||
* psi_task_change (hotpath) which can't use locks
|
* psi_task_change (hotpath) which can't use locks
|
||||||
*/
|
*/
|
||||||
if (likely(kworker))
|
if (likely(task))
|
||||||
kthread_queue_delayed_work(kworker, &group->poll_work, delay);
|
mod_timer(&group->poll_timer, jiffies + delay);
|
||||||
else
|
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void psi_poll_work(struct kthread_work *work)
|
static void psi_poll_work(struct psi_group *group)
|
||||||
{
|
{
|
||||||
struct kthread_delayed_work *dwork;
|
|
||||||
struct psi_group *group;
|
|
||||||
u32 changed_states;
|
u32 changed_states;
|
||||||
u64 now;
|
u64 now;
|
||||||
|
|
||||||
dwork = container_of(work, struct kthread_delayed_work, work);
|
|
||||||
group = container_of(dwork, struct psi_group, poll_work);
|
|
||||||
|
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->trigger_lock);
|
||||||
|
|
||||||
now = sched_clock();
|
now = sched_clock();
|
||||||
@ -623,6 +613,35 @@ out:
|
|||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->trigger_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int psi_poll_worker(void *data)
|
||||||
|
{
|
||||||
|
struct psi_group *group = (struct psi_group *)data;
|
||||||
|
struct sched_param param = {
|
||||||
|
.sched_priority = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
wait_event_interruptible(group->poll_wait,
|
||||||
|
atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
|
||||||
|
kthread_should_stop());
|
||||||
|
if (kthread_should_stop())
|
||||||
|
break;
|
||||||
|
|
||||||
|
psi_poll_work(group);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void poll_timer_fn(struct timer_list *t)
|
||||||
|
{
|
||||||
|
struct psi_group *group = from_timer(group, t, poll_timer);
|
||||||
|
|
||||||
|
atomic_set(&group->poll_wakeup, 1);
|
||||||
|
wake_up_interruptible(&group->poll_wait);
|
||||||
|
}
|
||||||
|
|
||||||
static void record_times(struct psi_group_cpu *groupc, int cpu,
|
static void record_times(struct psi_group_cpu *groupc, int cpu,
|
||||||
bool memstall_tick)
|
bool memstall_tick)
|
||||||
{
|
{
|
||||||
@ -1099,22 +1118,20 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
|||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->trigger_lock);
|
||||||
|
|
||||||
if (!rcu_access_pointer(group->poll_kworker)) {
|
if (!rcu_access_pointer(group->poll_task)) {
|
||||||
struct sched_param param = {
|
struct task_struct *task;
|
||||||
.sched_priority = 1,
|
|
||||||
};
|
|
||||||
struct kthread_worker *kworker;
|
|
||||||
|
|
||||||
kworker = kthread_create_worker(0, "psimon");
|
task = kthread_create(psi_poll_worker, group, "psimon");
|
||||||
if (IS_ERR(kworker)) {
|
if (IS_ERR(task)) {
|
||||||
kfree(t);
|
kfree(t);
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->trigger_lock);
|
||||||
return ERR_CAST(kworker);
|
return ERR_CAST(task);
|
||||||
}
|
}
|
||||||
sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m);
|
atomic_set(&group->poll_wakeup, 0);
|
||||||
kthread_init_delayed_work(&group->poll_work,
|
init_waitqueue_head(&group->poll_wait);
|
||||||
psi_poll_work);
|
wake_up_process(task);
|
||||||
rcu_assign_pointer(group->poll_kworker, kworker);
|
timer_setup(&group->poll_timer, poll_timer_fn, 0);
|
||||||
|
rcu_assign_pointer(group->poll_task, task);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add(&t->node, &group->triggers);
|
list_add(&t->node, &group->triggers);
|
||||||
@ -1132,7 +1149,7 @@ static void psi_trigger_destroy(struct kref *ref)
|
|||||||
{
|
{
|
||||||
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
||||||
struct psi_group *group = t->group;
|
struct psi_group *group = t->group;
|
||||||
struct kthread_worker *kworker_to_destroy = NULL;
|
struct task_struct *task_to_destroy = NULL;
|
||||||
|
|
||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return;
|
return;
|
||||||
@ -1158,13 +1175,13 @@ static void psi_trigger_destroy(struct kref *ref)
|
|||||||
period = min(period, div_u64(tmp->win.size,
|
period = min(period, div_u64(tmp->win.size,
|
||||||
UPDATES_PER_WINDOW));
|
UPDATES_PER_WINDOW));
|
||||||
group->poll_min_period = period;
|
group->poll_min_period = period;
|
||||||
/* Destroy poll_kworker when the last trigger is destroyed */
|
/* Destroy poll_task when the last trigger is destroyed */
|
||||||
if (group->poll_states == 0) {
|
if (group->poll_states == 0) {
|
||||||
group->polling_until = 0;
|
group->polling_until = 0;
|
||||||
kworker_to_destroy = rcu_dereference_protected(
|
task_to_destroy = rcu_dereference_protected(
|
||||||
group->poll_kworker,
|
group->poll_task,
|
||||||
lockdep_is_held(&group->trigger_lock));
|
lockdep_is_held(&group->trigger_lock));
|
||||||
rcu_assign_pointer(group->poll_kworker, NULL);
|
rcu_assign_pointer(group->poll_task, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1172,25 +1189,23 @@ static void psi_trigger_destroy(struct kref *ref)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for both *trigger_ptr from psi_trigger_replace and
|
* Wait for both *trigger_ptr from psi_trigger_replace and
|
||||||
* poll_kworker RCUs to complete their read-side critical sections
|
* poll_task RCUs to complete their read-side critical sections
|
||||||
* before destroying the trigger and optionally the poll_kworker
|
* before destroying the trigger and optionally the poll_task
|
||||||
*/
|
*/
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
/*
|
/*
|
||||||
* Destroy the kworker after releasing trigger_lock to prevent a
|
* Destroy the kworker after releasing trigger_lock to prevent a
|
||||||
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
||||||
*/
|
*/
|
||||||
if (kworker_to_destroy) {
|
if (task_to_destroy) {
|
||||||
/*
|
/*
|
||||||
* After the RCU grace period has expired, the worker
|
* After the RCU grace period has expired, the worker
|
||||||
* can no longer be found through group->poll_kworker.
|
* can no longer be found through group->poll_task.
|
||||||
* But it might have been already scheduled before
|
* But it might have been already scheduled before
|
||||||
* that - deschedule it cleanly before destroying it.
|
* that - deschedule it cleanly before destroying it.
|
||||||
*/
|
*/
|
||||||
kthread_cancel_delayed_work_sync(&group->poll_work);
|
del_timer_sync(&group->poll_timer);
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
kthread_stop(task_to_destroy);
|
||||||
|
|
||||||
kthread_destroy_worker(kworker_to_destroy);
|
|
||||||
}
|
}
|
||||||
kfree(t);
|
kfree(t);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user