sched/topology: Warn when NUMA diameter > 2
NUMA topologies where the shortest path between some two nodes requires three or more hops (i.e. diameter > 2) end up being misrepresented in the scheduler topology structures. This is currently detected when booting a kernel with CONFIG_SCHED_DEBUG=y + sched_debug on the cmdline, although this will only yield a warning about sched_group spans not matching sched_domain spans: ERROR: groups don't span domain->span Add an explicit warning for that case, triggered regardless of CONFIG_SCHED_DEBUG, and decorate it with an appropriate comment. The topology described in the comment can be booted up on QEMU by appending the following to your usual QEMU incantation: -smp cores=4 \ -numa node,cpus=0,nodeid=0 -numa node,cpus=1,nodeid=1, \ -numa node,cpus=2,nodeid=2, -numa node,cpus=3,nodeid=3, \ -numa dist,src=0,dst=1,val=20, -numa dist,src=0,dst=2,val=30, \ -numa dist,src=0,dst=3,val=40, -numa dist,src=1,dst=2,val=20, \ -numa dist,src=1,dst=3,val=30, -numa dist,src=2,dst=3,val=20 A somewhat more realistic topology (6-node mesh) with the same affliction can be conjured with: -smp cores=6 \ -numa node,cpus=0,nodeid=0 -numa node,cpus=1,nodeid=1, \ -numa node,cpus=2,nodeid=2, -numa node,cpus=3,nodeid=3, \ -numa node,cpus=4,nodeid=4, -numa node,cpus=5,nodeid=5, \ -numa dist,src=0,dst=1,val=20, -numa dist,src=0,dst=2,val=30, \ -numa dist,src=0,dst=3,val=40, -numa dist,src=0,dst=4,val=30, \ -numa dist,src=0,dst=5,val=20, \ -numa dist,src=1,dst=2,val=20, -numa dist,src=1,dst=3,val=30, \ -numa dist,src=1,dst=4,val=20, -numa dist,src=1,dst=5,val=30, \ -numa dist,src=2,dst=3,val=20, -numa dist,src=2,dst=4,val=30, \ -numa dist,src=2,dst=5,val=40, \ -numa dist,src=3,dst=4,val=20, -numa dist,src=3,dst=5,val=30, \ -numa dist,src=4,dst=5,val=20 Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mel Gorman <mgorman@techsingularity.net> Link: https://lore.kernel.org/lkml/jhjtux5edo2.mognet@arm.com
This commit is contained in:
parent
406100f3da
commit
b5b217346d
@ -675,6 +675,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(cpu);
|
struct rq *rq = cpu_rq(cpu);
|
||||||
struct sched_domain *tmp;
|
struct sched_domain *tmp;
|
||||||
|
int numa_distance = 0;
|
||||||
|
|
||||||
/* Remove the sched domains which do not contribute to scheduling. */
|
/* Remove the sched domains which do not contribute to scheduling. */
|
||||||
for (tmp = sd; tmp; ) {
|
for (tmp = sd; tmp; ) {
|
||||||
@ -706,6 +707,38 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|||||||
sd->child = NULL;
|
sd->child = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (tmp = sd; tmp; tmp = tmp->parent)
|
||||||
|
numa_distance += !!(tmp->flags & SD_NUMA);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: Diameter >=3 is misrepresented.
|
||||||
|
*
|
||||||
|
* Smallest diameter=3 topology is:
|
||||||
|
*
|
||||||
|
* node 0 1 2 3
|
||||||
|
* 0: 10 20 30 40
|
||||||
|
* 1: 20 10 20 30
|
||||||
|
* 2: 30 20 10 20
|
||||||
|
* 3: 40 30 20 10
|
||||||
|
*
|
||||||
|
* 0 --- 1 --- 2 --- 3
|
||||||
|
*
|
||||||
|
* NUMA-3 0-3 N/A N/A 0-3
|
||||||
|
* groups: {0-2},{1-3} {1-3},{0-2}
|
||||||
|
*
|
||||||
|
* NUMA-2 0-2 0-3 0-3 1-3
|
||||||
|
* groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2}
|
||||||
|
*
|
||||||
|
* NUMA-1 0-1 0-2 1-3 2-3
|
||||||
|
* groups: {0},{1} {1},{2},{0} {2},{3},{1} {3},{2}
|
||||||
|
*
|
||||||
|
* NUMA-0 0 1 2 3
|
||||||
|
*
|
||||||
|
* The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
|
||||||
|
* group span isn't a subset of the domain span.
|
||||||
|
*/
|
||||||
|
WARN_ONCE(numa_distance > 2, "Shortest NUMA path spans too many nodes\n");
|
||||||
|
|
||||||
sched_domain_debug(sd, cpu);
|
sched_domain_debug(sd, cpu);
|
||||||
|
|
||||||
rq_attach_root(rq, rd);
|
rq_attach_root(rq, rd);
|
||||||
|
Loading…
Reference in New Issue
Block a user