From 42eeb3b5731de338a84a6d84e6318aeaa642a5b8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2024 11:39:31 +0100 Subject: [PATCH 1/5] scftorture: Avoid additional div operation. Replace "scfp->cpu % nr_cpu_ids" with "cpu". This has been computed earlier. Tested-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Boqun Feng Tested-by: Boqun Feng Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 44e83a646264..455cbff35a1a 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -463,7 +463,7 @@ static int scftorture_invoker(void *arg) // Make sure that the CPU is affinitized appropriately during testing. curcpu = raw_smp_processor_id(); - WARN_ONCE(curcpu != scfp->cpu % nr_cpu_ids, + WARN_ONCE(curcpu != cpu, "%s: Wanted CPU %d, running on %d, nr_cpu_ids = %d\n", __func__, scfp->cpu, curcpu, nr_cpu_ids); From 43082cd579fbeea2ed90982f1c875bbdb2bcad2e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2024 11:39:32 +0100 Subject: [PATCH 2/5] scftorture: Wait until scf_cleanup_handler() completes. The smp_call_function() needs to be invoked with the wait flag set to wait until scf_cleanup_handler() is done. This ensures that all SMP function calls, that have been queued earlier, complete at this point. Tested-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Boqun Feng Tested-by: Boqun Feng Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 455cbff35a1a..654702f75c54 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -523,7 +523,7 @@ static void scf_torture_cleanup(void) torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task); else goto end; - smp_call_function(scf_cleanup_handler, NULL, 0); + smp_call_function(scf_cleanup_handler, NULL, 1); torture_stop_kthread(scf_torture_stats, scf_torture_stats_task); scf_torture_stats_print(); // -After- the stats thread is stopped! kfree(scf_stats_p); // -After- the last stats print has completed! From 64bdaf963c3ac04a67c8491bea2d0ecfc7d5da96 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2024 11:39:33 +0100 Subject: [PATCH 3/5] scftorture: Move memory allocation outside of preempt_disable region. Memory allocations can not happen within regions with explicit disabled preemption PREEMPT_RT. The problem is that the locking structures underneath are sleeping locks. Move the memory allocation outside of the preempt-disabled section. Keep the GFP_ATOMIC for the allocation to behave like a "ememergncy allocation". Tested-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Boqun Feng Tested-by: Boqun Feng Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 654702f75c54..e3c60f6dd547 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -320,10 +320,6 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra struct scf_check *scfcp = NULL; struct scf_selector *scfsp = scf_sel_rand(trsp); - if (use_cpus_read_lock) - cpus_read_lock(); - else - preempt_disable(); if (scfsp->scfs_prim == SCF_PRIM_SINGLE || scfsp->scfs_wait) { scfcp = kmalloc(sizeof(*scfcp), GFP_ATOMIC); if (!scfcp) { @@ -337,6 +333,10 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra scfcp->scfc_rpc = false; } } + if (use_cpus_read_lock) + cpus_read_lock(); + else + preempt_disable(); switch (scfsp->scfs_prim) { case SCF_PRIM_RESCHED: if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST)) { From 4788c861ad7e97d611e9b94596c896f5d0d072a6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2024 11:39:34 +0100 Subject: [PATCH 4/5] scftorture: Use a lock-less list to free memory. scf_handler() is used as a SMP function call. This function is always invoked in IRQ-context even with forced-threading enabled. This function frees memory which not allowed on PREEMPT_RT because the locking underneath is using sleeping locks. Add a per-CPU scf_free_pool where each SMP functions adds its memory to be freed. This memory is then freed by scftorture_invoker() on each iteration. On the majority of invocations the number of items is less than five. If the thread sleeps/ gets delayed the number exceed 350 but did not reach 400 in testing. These were the spikes during testing. The bulk free of 64 pointers at once should improve the give-back if the list grows. The list size is ~1.3 items per invocations. Having one global scf_free_pool with one cleaning thread let the list grow to over 10.000 items with 32 CPUs (again, spikes not the average) especially if the CPU went to sleep. The per-CPU part looks like a good compromise. Reported-by: "Paul E. McKenney" Closes: https://lore.kernel.org/lkml/41619255-cdc2-4573-a360-7794fc3614f7@paulmck-laptop/ Tested-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Boqun Feng Tested-by: Boqun Feng Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index e3c60f6dd547..eeafd3fc1682 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -97,6 +97,7 @@ struct scf_statistics { static struct scf_statistics *scf_stats_p; static struct task_struct *scf_torture_stats_task; static DEFINE_PER_CPU(long long, scf_invoked_count); +static DEFINE_PER_CPU(struct llist_head, scf_free_pool); // Data for random primitive selection #define SCF_PRIM_RESCHED 0 @@ -133,6 +134,7 @@ struct scf_check { bool scfc_wait; bool scfc_rpc; struct completion scfc_completion; + struct llist_node scf_node; }; // Use to wait for all threads to start. @@ -148,6 +150,31 @@ static DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand); extern void resched_cpu(int cpu); // An alternative IPI vector. +static void scf_add_to_free_list(struct scf_check *scfcp) +{ + struct llist_head *pool; + unsigned int cpu; + + cpu = raw_smp_processor_id() % nthreads; + pool = &per_cpu(scf_free_pool, cpu); + llist_add(&scfcp->scf_node, pool); +} + +static void scf_cleanup_free_list(unsigned int cpu) +{ + struct llist_head *pool; + struct llist_node *node; + struct scf_check *scfcp; + + pool = &per_cpu(scf_free_pool, cpu); + node = llist_del_all(pool); + while (node) { + scfcp = llist_entry(node, struct scf_check, scf_node); + node = node->next; + kfree(scfcp); + } +} + // Print torture statistics. Caller must ensure serialization. static void scf_torture_stats_print(void) { @@ -296,7 +323,7 @@ out: if (scfcp->scfc_rpc) complete(&scfcp->scfc_completion); } else { - kfree(scfcp); + scf_add_to_free_list(scfcp); } } @@ -363,7 +390,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra scfp->n_single_wait_ofl++; else scfp->n_single_ofl++; - kfree(scfcp); + scf_add_to_free_list(scfcp); scfcp = NULL; } break; @@ -391,7 +418,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra preempt_disable(); } else { scfp->n_single_rpc_ofl++; - kfree(scfcp); + scf_add_to_free_list(scfcp); scfcp = NULL; } break; @@ -428,7 +455,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra pr_warn("%s: Memory-ordering failure, scfs_prim: %d.\n", __func__, scfsp->scfs_prim); atomic_inc(&n_mb_out_errs); // Leak rather than trash! } else { - kfree(scfcp); + scf_add_to_free_list(scfcp); } barrier(); // Prevent race-reduction compiler optimizations. } @@ -479,6 +506,8 @@ static int scftorture_invoker(void *arg) VERBOSE_SCFTORTOUT("scftorture_invoker %d started", scfp->cpu); do { + scf_cleanup_free_list(cpu); + scftorture_invoke_one(scfp, &rand); while (cpu_is_offline(cpu) && !torture_must_stop()) { schedule_timeout_interruptible(HZ / 5); @@ -529,6 +558,9 @@ static void scf_torture_cleanup(void) kfree(scf_stats_p); // -After- the last stats print has completed! scf_stats_p = NULL; + for (i = 0; i < nr_cpu_ids; i++) + scf_cleanup_free_list(i); + if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) || atomic_read(&n_mb_out_errs)) scftorture_print_module_parms("End of test: FAILURE"); else if (torture_onoff_failures()) From f946cae86d088d02a2f9c0ae0bf8a80359d3f454 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 12 Nov 2024 17:20:23 +0100 Subject: [PATCH 5/5] scftorture: Handle NULL argument passed to scf_add_to_free_list(). Dan reported that after the rework the newly introduced scf_add_to_free_list() may get a NULL pointer passed. This replaced kfree() which was fine with a NULL pointer but scf_add_to_free_list() isn't. Let scf_add_to_free_list() handle NULL pointer. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/2375aa2c-3248-4ffa-b9b0-f0a24c50f237@stanley.mountain Fixes: 4788c861ad7e9 ("scftorture: Use a lock-less list to free memory.") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index eeafd3fc1682..d86d2d9c4624 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -155,6 +155,8 @@ static void scf_add_to_free_list(struct scf_check *scfcp) struct llist_head *pool; unsigned int cpu; + if (!scfcp) + return; cpu = raw_smp_processor_id() % nthreads; pool = &per_cpu(scf_free_pool, cpu); llist_add(&scfcp->scf_node, pool);