mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 04:31:50 +00:00
sched/topology: Skip updating masks for non-online nodes
The scheduler currently expects NUMA node distances to be stable from init onwards, and as a consequence builds the related data structures once-and-for-all at init (see sched_init_numa()). Unfortunately, on some architectures node distance is unreliable for offline nodes and may very well change upon onlining. Skip over offline nodes during sched_init_numa(). Track nodes that have been onlined at least once, and trigger a build of a node's NUMA masks when it is first onlined post-init. Reported-by: Geetika Moolchandani <Geetika.Moolchandani1@ibm.com> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20210818074333.48645-1-srikar@linux.vnet.ibm.com
This commit is contained in:
parent
746f5ea9c4
commit
0083242c93
@ -1482,6 +1482,8 @@ int sched_max_numa_distance;
|
||||
static int *sched_domains_numa_distance;
|
||||
static struct cpumask ***sched_domains_numa_masks;
|
||||
int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
|
||||
|
||||
static unsigned long __read_mostly *sched_numa_onlined_nodes;
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -1833,6 +1835,16 @@ void sched_init_numa(void)
|
||||
sched_domains_numa_masks[i][j] = mask;
|
||||
|
||||
for_each_node(k) {
|
||||
/*
|
||||
* Distance information can be unreliable for
|
||||
* offline nodes, defer building the node
|
||||
* masks to its bringup.
|
||||
* This relies on all unique distance values
|
||||
* still being visible at init time.
|
||||
*/
|
||||
if (!node_online(j))
|
||||
continue;
|
||||
|
||||
if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
|
||||
sched_numa_warn("Node-distance not symmetric");
|
||||
|
||||
@ -1886,6 +1898,53 @@ void sched_init_numa(void)
|
||||
sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
|
||||
|
||||
init_numa_topology_type();
|
||||
|
||||
sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
|
||||
if (!sched_numa_onlined_nodes)
|
||||
return;
|
||||
|
||||
bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
|
||||
for_each_online_node(i)
|
||||
bitmap_set(sched_numa_onlined_nodes, i, 1);
|
||||
}
|
||||
|
||||
static void __sched_domains_numa_masks_set(unsigned int node)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
/*
|
||||
* NUMA masks are not built for offline nodes in sched_init_numa().
|
||||
* Thus, when a CPU of a never-onlined-before node gets plugged in,
|
||||
* adding that new CPU to the right NUMA masks is not sufficient: the
|
||||
* masks of that CPU's node must also be updated.
|
||||
*/
|
||||
if (test_bit(node, sched_numa_onlined_nodes))
|
||||
return;
|
||||
|
||||
bitmap_set(sched_numa_onlined_nodes, node, 1);
|
||||
|
||||
for (i = 0; i < sched_domains_numa_levels; i++) {
|
||||
for (j = 0; j < nr_node_ids; j++) {
|
||||
if (!node_online(j) || node == j)
|
||||
continue;
|
||||
|
||||
if (node_distance(j, node) > sched_domains_numa_distance[i])
|
||||
continue;
|
||||
|
||||
/* Add remote nodes in our masks */
|
||||
cpumask_or(sched_domains_numa_masks[i][node],
|
||||
sched_domains_numa_masks[i][node],
|
||||
sched_domains_numa_masks[0][j]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A new node has been brought up, potentially changing the topology
|
||||
* classification.
|
||||
*
|
||||
* Note that this is racy vs any use of sched_numa_topology_type :/
|
||||
*/
|
||||
init_numa_topology_type();
|
||||
}
|
||||
|
||||
void sched_domains_numa_masks_set(unsigned int cpu)
|
||||
@ -1893,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu)
|
||||
int node = cpu_to_node(cpu);
|
||||
int i, j;
|
||||
|
||||
__sched_domains_numa_masks_set(node);
|
||||
|
||||
for (i = 0; i < sched_domains_numa_levels; i++) {
|
||||
for (j = 0; j < nr_node_ids; j++) {
|
||||
if (!node_online(j))
|
||||
continue;
|
||||
|
||||
/* Set ourselves in the remote node's masks */
|
||||
if (node_distance(j, node) <= sched_domains_numa_distance[i])
|
||||
cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user