workqueue: Add pwq->stats[] and a monitoring script

Currently, the only way to peer into workqueue operations is through
tracing. While possible, it isn't easy or convenient to monitor
per-workqueue behaviors over time this way. Let's add pwq->stats[] that
track relevant events and a drgn monitoring script -
tools/workqueue/wq_monitor.py.

It's arguable whether this needs to be configurable. However, it currently
only has several counters and the runtime overhead shouldn't be noticeable
given that they're on pwq's which are per-cpu on per-cpu workqueues and
per-numa-node on unbound ones. Let's keep it simple for the time being.

v2: Patch reordered to earlier with fewer fields. Field will be added back
    gradually. Help message improved.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
This commit is contained in:
Tejun Heo 2023-05-17 17:02:08 -10:00
parent 854f5cc5b7
commit 725e8ec59c
3 changed files with 205 additions and 1 deletions

View File

@ -348,6 +348,37 @@ Guidelines
level of locality in wq operations and work item execution.
Monitoring
==========
Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
$ tools/workqueue/wq_monitor.py events
total infl CMwake mayday rescued
events 18545 0 5 - -
events_highpri 8 0 0 - -
events_long 3 0 0 - -
events_unbound 38306 0 - - -
events_freezable 0 0 0 - -
events_power_efficient 29598 0 0 - -
events_freezable_power_ 10 0 0 - -
sock_diag_events 0 0 0 - -
total infl CMwake mayday rescued
events 18548 0 5 - -
events_highpri 8 0 0 - -
events_long 3 0 0 - -
events_unbound 38322 0 - - -
events_freezable 0 0 0 - -
events_power_efficient 29603 0 0 - -
events_freezable_power_ 10 0 0 - -
sock_diag_events 0 0 0 - -
...
See the command's help message for more info.
Debugging
=========
@ -387,6 +418,7 @@ the stack trace of the offending worker thread. ::
The work item's function should be trivially visible in the stack
trace.
Non-reentrance Conditions
=========================

View File

@ -199,6 +199,20 @@ struct worker_pool {
struct rcu_head rcu;
};
/*
* Per-pool_workqueue statistics. These can be monitored using
* tools/workqueue/wq_monitor.py.
*/
enum pool_workqueue_stats {
PWQ_STAT_STARTED, /* work items started execution */
PWQ_STAT_COMPLETED, /* work items completed execution */
PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
PWQ_STAT_MAYDAY, /* maydays to rescuer */
PWQ_STAT_RESCUED, /* linked work items executed by rescuer */
PWQ_NR_STATS,
};
/*
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
* of work_struct->data are used for flags and the remaining high bits
@ -236,6 +250,8 @@ struct pool_workqueue {
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
u64 stats[PWQ_NR_STATS];
/*
* Release of unbound pwq is punted to system_wq. See put_pwq()
* and pwq_unbound_release_workfn() for details. pool_workqueue
@ -929,8 +945,10 @@ void wq_worker_sleeping(struct task_struct *task)
}
pool->nr_running--;
if (need_more_worker(pool))
if (need_more_worker(pool)) {
worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;
wake_up_worker(pool);
}
raw_spin_unlock_irq(&pool->lock);
}
@ -2165,6 +2183,7 @@ static void send_mayday(struct work_struct *work)
get_pwq(pwq);
list_add_tail(&pwq->mayday_node, &wq->maydays);
wake_up_process(wq->rescuer->task);
pwq->stats[PWQ_STAT_MAYDAY]++;
}
}
@ -2403,6 +2422,7 @@ __acquires(&pool->lock)
* workqueues), so hiding them isn't a problem.
*/
lockdep_invariant_state(true);
pwq->stats[PWQ_STAT_STARTED]++;
trace_workqueue_execute_start(work);
worker->current_func(work);
/*
@ -2410,6 +2430,7 @@ __acquires(&pool->lock)
* point will only record its address.
*/
trace_workqueue_execute_end(work, worker->current_func);
pwq->stats[PWQ_STAT_COMPLETED]++;
lock_map_release(&lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
@ -2653,6 +2674,7 @@ repeat:
if (first)
pool->watchdog_ts = jiffies;
move_linked_works(work, scheduled, &n);
pwq->stats[PWQ_STAT_RESCUED]++;
}
first = false;
}

View File

@ -0,0 +1,150 @@
#!/usr/bin/env drgn
#
# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
desc = """
This is a drgn script to monitor workqueues. For more info on drgn, visit
https://github.com/osandov/drgn.
total Total number of work items executed by the workqueue.
infl The number of currently in-flight work items.
CMwake The number of concurrency-management wake-ups while executing a
work item of the workqueue.
mayday The number of times the rescuer was requested while waiting for
new worker creation.
rescued The number of work items executed by the rescuer.
"""
import sys
import signal
import os
import re
import time
import json
import drgn
from drgn.helpers.linux.list import list_for_each_entry,list_empty
from drgn.helpers.linux.cpumask import for_each_possible_cpu
import argparse
parser = argparse.ArgumentParser(description=desc,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('workqueue', metavar='REGEX', nargs='*',
help='Target workqueue name patterns (all if empty)')
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
help='Monitoring interval (0 to print once and exit)')
parser.add_argument('-j', '--json', action='store_true',
help='Output in json')
args = parser.parse_args()
def err(s):
print(s, file=sys.stderr, flush=True)
sys.exit(1)
workqueues = prog['workqueues']
WQ_UNBOUND = prog['WQ_UNBOUND']
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
PWQ_NR_STATS = prog['PWQ_NR_STATS']
class WqStats:
def __init__(self, wq):
self.name = wq.name.string_().decode()
self.unbound = wq.flags & WQ_UNBOUND != 0
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
self.stats = [0] * PWQ_NR_STATS
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
for i in range(PWQ_NR_STATS):
self.stats[i] += int(pwq.stats[i])
def dict(self, now):
return { 'timestamp' : now,
'name' : self.name,
'unbound' : self.unbound,
'mem_reclaim' : self.mem_reclaim,
'started' : self.stats[PWQ_STAT_STARTED],
'completed' : self.stats[PWQ_STAT_COMPLETED],
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
'mayday' : self.stats[PWQ_STAT_MAYDAY],
'rescued' : self.stats[PWQ_STAT_RESCUED], }
def table_header_str():
return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
def table_row_str(self):
cm_wakeup = '-'
mayday = '-'
rescued = '-'
if not self.unbound:
cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
if self.mem_reclaim:
mayday = str(self.stats[PWQ_STAT_MAYDAY])
rescued = str(self.stats[PWQ_STAT_RESCUED])
out = f'{self.name[-24:]:24} ' \
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
f'{cm_wakeup:>7} ' \
f'{mayday:>7} ' \
f'{rescued:>7} '
return out.rstrip(':')
exit_req = False
def sigint_handler(signr, frame):
global exit_req
exit_req = True
def main():
# handle args
table_fmt = not args.json
interval = args.interval
re_str = None
if args.workqueue:
for r in args.workqueue:
if re_str is None:
re_str = r
else:
re_str += '|' + r
filter_re = re.compile(re_str) if re_str else None
# monitoring loop
signal.signal(signal.SIGINT, sigint_handler)
while not exit_req:
now = time.time()
if table_fmt:
print()
print(WqStats.table_header_str())
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
stats = WqStats(wq)
if filter_re and not filter_re.search(stats.name):
continue
if table_fmt:
print(stats.table_row_str())
else:
print(stats.dict(now))
if interval == 0:
break
time.sleep(interval)
if __name__ == "__main__":
main()