From 7a0f308337d11fd5caa9f845c6d08cc5d6067988 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:29:01 +0100 Subject: [PATCH] sched/numa: Resist moving tasks towards nodes with fewer hinting faults Just as "sched: Favour moving tasks towards the preferred node" favours moving tasks towards nodes with a higher number of recorded NUMA hinting faults, this patch resists moving tasks towards nodes with lower faults. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-24-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++++++ kernel/sched/features.h | 8 ++++++++ 2 files changed, 41 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6ffddca687fe..89431248d33d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4107,12 +4107,43 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) return false; } + + +static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) +{ + int src_nid, dst_nid; + + if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) + return false; + + if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) + return false; + + src_nid = cpu_to_node(env->src_cpu); + dst_nid = cpu_to_node(env->dst_cpu); + + if (src_nid == dst_nid || + p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) + return false; + + if (p->numa_faults[dst_nid] < p->numa_faults[src_nid]) + return true; + + return false; +} + #else static inline bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) { return false; } + +static inline bool migrate_degrades_locality(struct task_struct *p, + struct lb_env *env) +{ + return false; +} #endif /* @@ -4177,6 +4208,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 3) too many balance attempts have failed. */ tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd); + if (!tsk_cache_hot) + tsk_cache_hot = migrate_degrades_locality(p, env); if (migrate_improves_locality(p, env)) { #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d9278ce2c4b4..5716929a2e3a 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -74,4 +74,12 @@ SCHED_FEAT(NUMA, false) * balancing. */ SCHED_FEAT(NUMA_FAVOUR_HIGHER, true) + +/* + * NUMA_RESIST_LOWER will resist moving tasks towards nodes where a + * lower number of hinting faults have been recorded. As this has + * the potential to prevent a task ever migrating to a new node + * due to CPU overload it is disabled by default. + */ +SCHED_FEAT(NUMA_RESIST_LOWER, false) #endif