Merge branch 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:
 "There is a long-standing subtle destroy_workqueue() bug where a
  workqueue can be destroyed while internal work items used for flushing
  are still in flight. Lai fixed it by assigning a flush color to the
  internal work items so that they are correctly waited for during
  destruction.

  Other than that, all are minor cleanups"

* 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Remove unused WORK_NO_COLOR
  workqueue: Assign a color to barrier work items
  workqueue: Mark barrier work with WORK_STRUCT_INACTIVE
  workqueue: Change the code of calculating work_flags in insert_wq_barrier()
  workqueue: Change arguement of pwq_dec_nr_in_flight()
  workqueue: Rename "delayed" (delayed by active management) to "inactive"
  workqueue: Replace deprecated CPU-hotplug functions.
  workqueue: Replace deprecated ida_simple_*() with ida_alloc()/ida_free()
  workqueue: Fix typo in comments
  workqueue: Fix possible memory leaks in wq_numa_init()
This commit is contained in:
Linus Torvalds 2021-08-31 15:53:20 -07:00
commit 86ac54e79f
3 changed files with 116 additions and 88 deletions

View File

@ -29,7 +29,7 @@ void delayed_work_timer_fn(struct timer_list *t);
enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */
WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@ -42,7 +42,7 @@ enum {
WORK_STRUCT_COLOR_BITS = 4,
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@ -51,19 +51,14 @@ enum {
WORK_STRUCT_STATIC = 0,
#endif
/*
* The last color is no color used for works which don't
* participate in workqueue flushing.
*/
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
WORK_NO_COLOR = WORK_NR_COLORS,
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
/*
* Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
* This makes pwqs aligned to 256 bytes and allows 15 workqueue
* This makes pwqs aligned to 256 bytes and allows 16 workqueue
* flush colors.
*/
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
@ -324,7 +319,7 @@ enum {
* to execute and tries to keep idle cores idle to conserve power;
* however, for example, a per-cpu work item scheduled from an
* interrupt handler on an idle CPU will force the scheduler to
* excute the work item on that CPU breaking the idleness, which in
* execute the work item on that CPU breaking the idleness, which in
* turn may lead to more scheduling choices which are sub-optimal
* in terms of power consumption.
*

View File

@ -205,9 +205,26 @@ struct pool_workqueue {
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
/*
* nr_active management and WORK_STRUCT_INACTIVE:
*
* When pwq->nr_active >= max_active, new work item is queued to
* pwq->inactive_works instead of pool->worklist and marked with
* WORK_STRUCT_INACTIVE.
*
* All work items marked with WORK_STRUCT_INACTIVE do not participate
* in pwq->nr_active and all work items in pwq->inactive_works are
* marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
* work items are in pwq->inactive_works. Some of them are ready to
* run in pool->worklist or worker->scheduled. Those work itmes are
* only struct wq_barrier which is used for flush_work() and should
* not participate in pwq->nr_active. For non-barrier work item, it
* is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
*/
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
struct list_head delayed_works; /* L: delayed works */
struct list_head inactive_works; /* L: inactive works */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
@ -524,7 +541,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
#endif
/**
* worker_pool_assign_id - allocate ID and assing it to @pool
* worker_pool_assign_id - allocate ID and assign it to @pool
* @pool: the pool pointer of interest
*
* Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
@ -579,9 +596,9 @@ static unsigned int work_color_to_flags(int color)
return color << WORK_STRUCT_COLOR_SHIFT;
}
static int get_work_color(struct work_struct *work)
static int get_work_color(unsigned long work_data)
{
return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
((1 << WORK_STRUCT_COLOR_BITS) - 1);
}
@ -1136,7 +1153,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
}
}
static void pwq_activate_delayed_work(struct work_struct *work)
static void pwq_activate_inactive_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
@ -1144,22 +1161,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
if (list_empty(&pwq->pool->worklist))
pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
__clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
pwq->nr_active++;
}
static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
{
struct work_struct *work = list_first_entry(&pwq->delayed_works,
struct work_struct *work = list_first_entry(&pwq->inactive_works,
struct work_struct, entry);
pwq_activate_delayed_work(work);
pwq_activate_inactive_work(work);
}
/**
* pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
* @pwq: pwq of interest
* @color: color of work which left the queue
* @work_data: work_data of work which left the queue
*
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its pwq and handle workqueue flushing.
@ -1167,21 +1184,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
* CONTEXT:
* raw_spin_lock_irq(pool->lock).
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
/* uncolored work items don't participate in flushing or nr_active */
if (color == WORK_NO_COLOR)
goto out_put;
int color = get_work_color(work_data);
if (!(work_data & WORK_STRUCT_INACTIVE)) {
pwq->nr_active--;
if (!list_empty(&pwq->inactive_works)) {
/* one down, submit an inactive one */
if (pwq->nr_active < pwq->max_active)
pwq_activate_first_inactive(pwq);
}
}
pwq->nr_in_flight[color]--;
pwq->nr_active--;
if (!list_empty(&pwq->delayed_works)) {
/* one down, submit a delayed one */
if (pwq->nr_active < pwq->max_active)
pwq_activate_first_delayed(pwq);
}
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
goto out_put;
@ -1281,17 +1298,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
debug_work_deactivate(work);
/*
* A delayed work item cannot be grabbed directly because
* it might have linked NO_COLOR work items which, if left
* on the delayed_list, will confuse pwq->nr_active
* A cancelable inactive work item must be in the
* pwq->inactive_works since a queued barrier can't be
* canceled (see the comments in insert_wq_barrier()).
*
* An inactive work item cannot be grabbed directly because
* it might have linked barrier work items which, if left
* on the inactive_works list, will confuse pwq->nr_active
* management later on and cause stall. Make sure the work
* item is activated before grabbing.
*/
if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
pwq_activate_delayed_work(work);
if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
pwq_activate_inactive_work(work);
list_del_init(&work->entry);
pwq_dec_nr_in_flight(pwq, get_work_color(work));
pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
@ -1490,8 +1511,8 @@ retry:
if (list_empty(worklist))
pwq->pool->watchdog_ts = jiffies;
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &pwq->delayed_works;
work_flags |= WORK_STRUCT_INACTIVE;
worklist = &pwq->inactive_works;
}
debug_work_activate(work);
@ -1912,14 +1933,14 @@ static void worker_detach_from_pool(struct worker *worker)
*/
static struct worker *create_worker(struct worker_pool *pool)
{
struct worker *worker = NULL;
int id = -1;
struct worker *worker;
int id;
char id_buf[16];
/* ID is needed to determine kthread name */
id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
if (id < 0)
goto fail;
return NULL;
worker = alloc_worker(pool->node);
if (!worker)
@ -1954,8 +1975,7 @@ static struct worker *create_worker(struct worker_pool *pool)
return worker;
fail:
if (id >= 0)
ida_simple_remove(&pool->worker_ida, id);
ida_free(&pool->worker_ida, id);
kfree(worker);
return NULL;
}
@ -2173,7 +2193,7 @@ __acquires(&pool->lock)
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
int work_color;
unsigned long work_data;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
/*
@ -2209,7 +2229,8 @@ __acquires(&pool->lock)
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
work_color = get_work_color(work);
work_data = *work_data_bits(work);
worker->current_color = get_work_color(work_data);
/*
* Record wq name for cmdline and debug reporting, may get
@ -2315,7 +2336,8 @@ __acquires(&pool->lock)
worker->current_work = NULL;
worker->current_func = NULL;
worker->current_pwq = NULL;
pwq_dec_nr_in_flight(pwq, work_color);
worker->current_color = INT_MAX;
pwq_dec_nr_in_flight(pwq, work_data);
}
/**
@ -2378,7 +2400,7 @@ woke_up:
set_pf_worker(false);
set_task_comm(worker->task, "kworker/dying");
ida_simple_remove(&pool->worker_ida, worker->id);
ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
kfree(worker);
return 0;
@ -2531,7 +2553,7 @@ repeat:
/*
* The above execution of rescued work items could
* have created more to rescue through
* pwq_activate_first_delayed() or chained
* pwq_activate_first_inactive() or chained
* queueing. Let's put @pwq back on mayday list so
* that such back-to-back work items, which may be
* being used to relieve memory pressure, don't
@ -2658,8 +2680,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
struct work_struct *target, struct worker *worker)
{
unsigned int work_flags = 0;
unsigned int work_color;
struct list_head *head;
unsigned int linked = 0;
/*
* debugobject calls are safe here even with pool->lock locked
@ -2674,24 +2697,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
barr->task = current;
/* The barrier work item does not participate in pwq->nr_active. */
work_flags |= WORK_STRUCT_INACTIVE;
/*
* If @target is currently being executed, schedule the
* barrier to the worker; otherwise, put it after @target.
*/
if (worker)
if (worker) {
head = worker->scheduled.next;
else {
work_color = worker->current_color;
} else {
unsigned long *bits = work_data_bits(target);
head = target->entry.next;
/* there can already be other linked works, inherit and set */
linked = *bits & WORK_STRUCT_LINKED;
work_flags |= *bits & WORK_STRUCT_LINKED;
work_color = get_work_color(*bits);
__set_bit(WORK_STRUCT_LINKED_BIT, bits);
}
pwq->nr_in_flight[work_color]++;
work_flags |= work_color_to_flags(work_color);
debug_work_activate(&barr->work);
insert_work(pwq, &barr->work, head,
work_color_to_flags(WORK_NO_COLOR) | linked);
insert_work(pwq, &barr->work, head, work_flags);
}
/**
@ -2957,7 +2987,7 @@ reflush:
bool drained;
raw_spin_lock_irq(&pwq->pool->lock);
drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
@ -3293,7 +3323,7 @@ int schedule_on_each_cpu(work_func_t func)
if (!works)
return -ENOMEM;
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
struct work_struct *work = per_cpu_ptr(works, cpu);
@ -3305,7 +3335,7 @@ int schedule_on_each_cpu(work_func_t func)
for_each_online_cpu(cpu)
flush_work(per_cpu_ptr(works, cpu));
put_online_cpus();
cpus_read_unlock();
free_percpu(works);
return 0;
}
@ -3713,7 +3743,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
* @pwq: target pool_workqueue
*
* If @pwq isn't freezing, set @pwq->max_active to the associated
* workqueue's saved_max_active and activate delayed work items
* workqueue's saved_max_active and activate inactive work items
* accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
*/
static void pwq_adjust_max_active(struct pool_workqueue *pwq)
@ -3742,9 +3772,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
pwq->max_active = wq->saved_max_active;
while (!list_empty(&pwq->delayed_works) &&
while (!list_empty(&pwq->inactive_works) &&
pwq->nr_active < pwq->max_active) {
pwq_activate_first_delayed(pwq);
pwq_activate_first_inactive(pwq);
kick = true;
}
@ -3763,7 +3793,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
}
/* initialize newly alloced @pwq which is associated with @wq and @pool */
/* initialize newly allocated @pwq which is associated with @wq and @pool */
static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
struct worker_pool *pool)
{
@ -3775,7 +3805,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
pwq->wq = wq;
pwq->flush_color = -1;
pwq->refcnt = 1;
INIT_LIST_HEAD(&pwq->delayed_works);
INIT_LIST_HEAD(&pwq->inactive_works);
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
@ -4016,14 +4046,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
static void apply_wqattrs_lock(void)
{
/* CPUs should stay stable across pwq creations and installations */
get_online_cpus();
cpus_read_lock();
mutex_lock(&wq_pool_mutex);
}
static void apply_wqattrs_unlock(void)
{
mutex_unlock(&wq_pool_mutex);
put_online_cpus();
cpus_read_unlock();
}
static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
@ -4068,7 +4098,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
*
* Performs GFP_KERNEL allocations.
*
* Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
* Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
*
* Return: 0 on success and -errno on failure.
*/
@ -4196,7 +4226,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
return 0;
}
get_online_cpus();
cpus_read_lock();
if (wq->flags & __WQ_ORDERED) {
ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
/* there should only be single pwq for ordering guarantee */
@ -4206,7 +4236,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
} else {
ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
put_online_cpus();
cpus_read_unlock();
return ret;
}
@ -4362,7 +4392,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
return true;
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
if (pwq->nr_active || !list_empty(&pwq->inactive_works))
return true;
return false;
@ -4558,7 +4588,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
else
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
ret = !list_empty(&pwq->inactive_works);
preempt_enable();
rcu_read_unlock();
@ -4754,11 +4784,11 @@ static void show_pwq(struct pool_workqueue *pwq)
pr_cont("\n");
}
if (!list_empty(&pwq->delayed_works)) {
if (!list_empty(&pwq->inactive_works)) {
bool comma = false;
pr_info(" delayed:");
list_for_each_entry(work, &pwq->delayed_works, entry) {
pr_info(" inactive:");
list_for_each_entry(work, &pwq->inactive_works, entry) {
pr_cont_work(comma, work);
comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
}
@ -4788,7 +4818,7 @@ void show_workqueue_state(void)
bool idle = true;
for_each_pwq(pwq, wq) {
if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
idle = false;
break;
}
@ -4800,7 +4830,7 @@ void show_workqueue_state(void)
for_each_pwq(pwq, wq) {
raw_spin_lock_irqsave(&pwq->pool->lock, flags);
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
if (pwq->nr_active || !list_empty(&pwq->inactive_works))
show_pwq(pwq);
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
/*
@ -5168,10 +5198,10 @@ long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
{
long ret = -ENODEV;
get_online_cpus();
cpus_read_lock();
if (cpu_online(cpu))
ret = work_on_cpu(cpu, fn, arg);
put_online_cpus();
cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_safe);
@ -5183,7 +5213,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
* freeze_workqueues_begin - begin freezing workqueues
*
* Start freezing workqueues. After this function returns, all freezable
* workqueues will queue new works to their delayed_works list instead of
* workqueues will queue new works to their inactive_works list instead of
* pool->worklist.
*
* CONTEXT:
@ -5331,7 +5361,7 @@ static int workqueue_apply_unbound_cpumask(void)
* the affinity of all unbound workqueues. This function check the @cpumask
* and apply it to all unbound workqueues and updates all pwqs of them.
*
* Retun: 0 - Success
* Return: 0 - Success
* -EINVAL - Invalid @cpumask
* -ENOMEM - Failed to allocate memory for attrs or pwqs.
*/
@ -5443,7 +5473,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
const char *delim = "";
int node, written = 0;
get_online_cpus();
cpus_read_lock();
rcu_read_lock();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
@ -5453,7 +5483,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
rcu_read_unlock();
put_online_cpus();
cpus_read_unlock();
return written;
}
@ -5902,6 +5932,13 @@ static void __init wq_numa_init(void)
return;
}
for_each_possible_cpu(cpu) {
if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
return;
}
}
wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_unbound_numa_attrs_buf);
@ -5919,11 +5956,6 @@ static void __init wq_numa_init(void)
for_each_possible_cpu(cpu) {
node = cpu_to_node(cpu);
if (WARN_ON(node == NUMA_NO_NODE)) {
pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
/* happens iff arch is bonkers, let's just proceed */
return;
}
cpumask_set_cpu(cpu, tbl[node]);
}

View File

@ -30,7 +30,8 @@ struct worker {
struct work_struct *current_work; /* L: work being processed */
work_func_t current_func; /* L: current_work's fn */
struct pool_workqueue *current_pwq; /* L: current_work's pwq */
struct pool_workqueue *current_pwq; /* L: current_work's pwq */
unsigned int current_color; /* L: current_work's color */
struct list_head scheduled; /* L: scheduled works */
/* 64 bytes boundary on 64bit, 32 on 32bit */