From 820a4f88ee4636433a46fcf14054036d0f71e798 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 22 Apr 2022 10:33:49 -0700 Subject: [PATCH 01/13] cgroup: Add new test_cpu.c test suite in cgroup selftests The cgroup selftests suite currently contains tests that validate various aspects of cgroup, such as validating the expected behavior for memory controllers, the expected behavior of cgroup.procs, etc. There are no tests that validate the expected behavior of the cgroup cpu controller. This patch therefore adds a new test_cpu.c file that will contain cpu controller testcases. The file currently only contains a single testcase that validates creating nested cgroups with cgroup.subtree_control including cpu. Future patches will add more sophisticated testcases that validate functional aspects of the cpu controller. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/.gitignore | 1 + tools/testing/selftests/cgroup/Makefile | 2 + tools/testing/selftests/cgroup/test_cpu.c | 110 ++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 tools/testing/selftests/cgroup/test_cpu.c diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index be9643ef6285..306ee1b01e72 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -4,3 +4,4 @@ test_core test_freezer test_kmem test_kill +test_cpu diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 745fe25fa0b9..478217cc1371 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -10,6 +10,7 @@ TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill +TEST_GEN_PROGS += test_cpu LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -20,3 +21,4 @@ $(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c $(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_cpu: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c new file mode 100644 index 000000000000..a724bff00d07 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include + +#include "../kselftest.h" +#include "cgroup_util.h" + +/* + * This test creates two nested cgroups with and without enabling + * the cpu controller. + */ +static int test_cpucg_subtree_control(const char *root) +{ + char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL; + int ret = KSFT_FAIL; + + // Create two nested cgroups with the cpu controller enabled. + parent = cg_name(root, "cpucg_test_0"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + child = cg_name(parent, "cpucg_test_child"); + if (!child) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_read_strstr(child, "cgroup.controllers", "cpu")) + goto cleanup; + + // Create two nested cgroups without enabling the cpu controller. + parent2 = cg_name(root, "cpucg_test_1"); + if (!parent2) + goto cleanup; + + if (cg_create(parent2)) + goto cleanup; + + child2 = cg_name(parent2, "cpucg_test_child"); + if (!child2) + goto cleanup; + + if (cg_create(child2)) + goto cleanup; + + if (!cg_read_strstr(child2, "cgroup.controllers", "cpu")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + free(child); + cg_destroy(child2); + free(child2); + cg_destroy(parent); + free(parent); + cg_destroy(parent2); + free(parent2); + + return ret; +} + +#define T(x) { x, #x } +struct cpucg_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cpucg_subtree_control), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "cpu")) + if (cg_write(root, "cgroup.subtree_control", "+cpu")) + ksft_exit_skip("Failed to set cpu controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} From 3c879a1bb88792c05c2dd6a957423838b2830d73 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 22 Apr 2022 10:33:51 -0700 Subject: [PATCH 02/13] cgroup: Add test_cpucg_stats() testcase to cgroup cpu selftests test_cpu.c includes testcases that validate the cgroup cpu controller. This patch adds a new testcase called test_cpucg_stats() that verifies the expected behavior of the cpu.stat interface. In doing so, we define a new hog_cpus_timed() function which takes a cpu_hog_func_param struct that configures how many CPUs it uses, and how long it runs. Future patches will also spawn threads that hog CPUs, so this function will eventually serve those use-cases as well. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/cgroup_util.h | 3 + tools/testing/selftests/cgroup/test_cpu.c | 128 +++++++++++++++++++ 2 files changed, 131 insertions(+) diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 628738532ac9..973774fa7bee 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -8,6 +8,9 @@ #define MB(x) (x << 20) +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L + /* * Checks if two given values differ by less than err% of their sum. */ diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index a724bff00d07..3bd61964a262 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -2,11 +2,19 @@ #define _GNU_SOURCE #include +#include +#include #include +#include #include "../kselftest.h" #include "cgroup_util.h" +struct cpu_hog_func_param { + int nprocs; + struct timespec ts; +}; + /* * This test creates two nested cgroups with and without enabling * the cpu controller. @@ -70,12 +78,132 @@ cleanup: return ret; } +static void *hog_cpu_thread_func(void *arg) +{ + while (1) + ; + + return NULL; +} + +static struct timespec +timespec_sub(const struct timespec *lhs, const struct timespec *rhs) +{ + struct timespec zero = { + .tv_sec = 0, + .tv_nsec = 0, + }; + struct timespec ret; + + if (lhs->tv_sec < rhs->tv_sec) + return zero; + + ret.tv_sec = lhs->tv_sec - rhs->tv_sec; + + if (lhs->tv_nsec < rhs->tv_nsec) { + if (ret.tv_sec == 0) + return zero; + + ret.tv_sec--; + ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec; + } else + ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec; + + return ret; +} + +static int hog_cpus_timed(const char *cgroup, void *arg) +{ + const struct cpu_hog_func_param *param = + (struct cpu_hog_func_param *)arg; + struct timespec ts_run = param->ts; + struct timespec ts_remaining = ts_run; + int i, ret; + + for (i = 0; i < param->nprocs; i++) { + pthread_t tid; + + ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL); + if (ret != 0) + return ret; + } + + while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) { + struct timespec ts_total; + + ret = nanosleep(&ts_remaining, NULL); + if (ret && errno != EINTR) + return ret; + + ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); + if (ret != 0) + return ret; + + ts_remaining = timespec_sub(&ts_run, &ts_total); + } + + return 0; +} + +/* + * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that + * cpu.stat shows the expected output. + */ +static int test_cpucg_stats(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec, system_usec; + long usage_seconds = 2; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec"); + if (usage_usec != 0 || user_usec != 0 || system_usec != 0) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + }; + if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (!values_close(usage_usec, expected_usage_usec, 1)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); const char *name; } tests[] = { T(test_cpucg_subtree_control), + T(test_cpucg_stats), }; #undef T From 6376b22cd0a3455a534b6921b816ffab68ddc48f Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 22 Apr 2022 10:33:52 -0700 Subject: [PATCH 03/13] cgroup: Add test_cpucg_weight_overprovisioned() testcase test_cpu.c includes testcases that validate the cgroup cpu controller. This patch adds a new testcase called test_cpucg_weight_overprovisioned() that verifies the expected behavior of creating multiple processes with different cpu.weight, on a system that is overprovisioned. So as to avoid code duplication, this patch also updates cpu_hog_func_param to take a new hog_clock_type enum which informs how time is counted in hog_cpus_timed() (either process time or wall clock time). Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/cgroup_util.c | 12 ++ tools/testing/selftests/cgroup/cgroup_util.h | 1 + tools/testing/selftests/cgroup/test_cpu.c | 135 ++++++++++++++++++- 3 files changed, 145 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index dbaa7aabbb4a..4297d580e3f8 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -190,6 +190,18 @@ int cg_write(const char *cgroup, const char *control, char *buf) return -1; } +int cg_write_numeric(const char *cgroup, const char *control, long value) +{ + char buf[64]; + int ret; + + ret = sprintf(buf, "%lu", value); + if (ret < 0) + return ret; + + return cg_write(cgroup, control, buf); +} + int cg_find_unified_root(char *root, size_t len) { char buf[10 * PAGE_SIZE]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 973774fa7bee..2ee2119281d7 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -35,6 +35,7 @@ extern long cg_read_long(const char *cgroup, const char *control); long cg_read_key_long(const char *cgroup, const char *control, const char *key); extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); +int cg_write_numeric(const char *cgroup, const char *control, long value); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 3bd61964a262..8d901c06c79d 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -2,6 +2,8 @@ #define _GNU_SOURCE #include +#include +#include #include #include #include @@ -10,9 +12,17 @@ #include "../kselftest.h" #include "cgroup_util.h" +enum hog_clock_type { + // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock. + CPU_HOG_CLOCK_PROCESS, + // Count elapsed time using system wallclock time. + CPU_HOG_CLOCK_WALL, +}; + struct cpu_hog_func_param { int nprocs; struct timespec ts; + enum hog_clock_type clock_type; }; /* @@ -118,8 +128,13 @@ static int hog_cpus_timed(const char *cgroup, void *arg) (struct cpu_hog_func_param *)arg; struct timespec ts_run = param->ts; struct timespec ts_remaining = ts_run; + struct timespec ts_start; int i, ret; + ret = clock_gettime(CLOCK_MONOTONIC, &ts_start); + if (ret != 0) + return ret; + for (i = 0; i < param->nprocs; i++) { pthread_t tid; @@ -135,9 +150,19 @@ static int hog_cpus_timed(const char *cgroup, void *arg) if (ret && errno != EINTR) return ret; - ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); - if (ret != 0) - return ret; + if (param->clock_type == CPU_HOG_CLOCK_PROCESS) { + ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); + if (ret != 0) + return ret; + } else { + struct timespec ts_current; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts_current); + if (ret != 0) + return ret; + + ts_total = timespec_sub(&ts_current, &ts_start); + } ts_remaining = timespec_sub(&ts_run, &ts_total); } @@ -176,6 +201,7 @@ static int test_cpucg_stats(const char *root) .tv_sec = usage_seconds, .tv_nsec = 0, }, + .clock_type = CPU_HOG_CLOCK_PROCESS, }; if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) goto cleanup; @@ -197,6 +223,108 @@ cleanup: return ret; } +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 50 + * A/C cpu.weight = 100 + * A/D cpu.weight = 150 + * + * A separate process is then created for each child cgroup which spawns as + * many threads as there are cores, and hogs each CPU as much as possible + * for some time interval. + * + * Once all of the children have exited, we verify that each child cgroup + * was given proportional runtime as informed by their cpu.weight. + */ +static int test_cpucg_weight_overprovisioned(const char *root) +{ + struct child { + char *cgroup; + pid_t pid; + long usage; + }; + int ret = KSFT_FAIL, i; + char *parent = NULL; + struct child children[3] = {NULL}; + long usage_seconds = 10; + + parent = cg_name(root, "cpucg_test_0"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i); + if (!children[i].cgroup) + goto cleanup; + + if (cg_create(children[i].cgroup)) + goto cleanup; + + if (cg_write_numeric(children[i].cgroup, "cpu.weight", + 50 * (i + 1))) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + struct cpu_hog_func_param param = { + .nprocs = get_nprocs(), + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed, + (void *)¶m); + if (pid <= 0) + goto cleanup; + children[i].pid = pid; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + int retcode; + + waitpid(children[i].pid, &retcode, 0); + if (!WIFEXITED(retcode)) + goto cleanup; + if (WEXITSTATUS(retcode)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) + children[i].usage = cg_read_key_long(children[i].cgroup, + "cpu.stat", "usage_usec"); + + for (i = 0; i < ARRAY_SIZE(children) - 1; i++) { + long delta; + + if (children[i + 1].usage <= children[i].usage) + goto cleanup; + + delta = children[i + 1].usage - children[i].usage; + if (!values_close(delta, children[0].usage, 35)) + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + for (i = 0; i < ARRAY_SIZE(children); i++) { + cg_destroy(children[i].cgroup); + free(children[i].cgroup); + } + cg_destroy(parent); + free(parent); + + return ret; +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -204,6 +332,7 @@ struct cpucg_test { } tests[] = { T(test_cpucg_subtree_control), T(test_cpucg_stats), + T(test_cpucg_weight_overprovisioned), }; #undef T From 4ab93063c83a2478863158799b027e9489ad4a40 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 22 Apr 2022 10:33:53 -0700 Subject: [PATCH 04/13] cgroup: Add test_cpucg_weight_underprovisioned() testcase test_cpu.c includes testcases that validate the cgroup cpu controller. This patch adds a new testcase called test_cpucg_weight_underprovisioned() that verifies that processes with different cpu.weight that are all running on an underprovisioned system, still get roughly the same amount of cpu time. Because test_cpucg_weight_underprovisioned() is very similar to test_cpucg_weight_overprovisioned(), this patch also pulls the common logic into a separate helper function that is invoked from both testcases, and which uses function pointers to invoke the unique portions of the testcases. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 163 ++++++++++++++++------ 1 file changed, 121 insertions(+), 42 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 8d901c06c79d..64f9ce91c992 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -19,6 +19,12 @@ enum hog_clock_type { CPU_HOG_CLOCK_WALL, }; +struct cpu_hogger { + char *cgroup; + pid_t pid; + long usage; +}; + struct cpu_hog_func_param { int nprocs; struct timespec ts; @@ -223,31 +229,15 @@ cleanup: return ret; } -/* - * First, this test creates the following hierarchy: - * A - * A/B cpu.weight = 50 - * A/C cpu.weight = 100 - * A/D cpu.weight = 150 - * - * A separate process is then created for each child cgroup which spawns as - * many threads as there are cores, and hogs each CPU as much as possible - * for some time interval. - * - * Once all of the children have exited, we verify that each child cgroup - * was given proportional runtime as informed by their cpu.weight. - */ -static int test_cpucg_weight_overprovisioned(const char *root) +static int +run_cpucg_weight_test( + const char *root, + pid_t (*spawn_child)(const struct cpu_hogger *child), + int (*validate)(const struct cpu_hogger *children, int num_children)) { - struct child { - char *cgroup; - pid_t pid; - long usage; - }; int ret = KSFT_FAIL, i; char *parent = NULL; - struct child children[3] = {NULL}; - long usage_seconds = 10; + struct cpu_hogger children[3] = {NULL}; parent = cg_name(root, "cpucg_test_0"); if (!parent) @@ -273,16 +263,7 @@ static int test_cpucg_weight_overprovisioned(const char *root) } for (i = 0; i < ARRAY_SIZE(children); i++) { - struct cpu_hog_func_param param = { - .nprocs = get_nprocs(), - .ts = { - .tv_sec = usage_seconds, - .tv_nsec = 0, - }, - .clock_type = CPU_HOG_CLOCK_WALL, - }; - pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed, - (void *)¶m); + pid_t pid = spawn_child(&children[i]); if (pid <= 0) goto cleanup; children[i].pid = pid; @@ -302,16 +283,8 @@ static int test_cpucg_weight_overprovisioned(const char *root) children[i].usage = cg_read_key_long(children[i].cgroup, "cpu.stat", "usage_usec"); - for (i = 0; i < ARRAY_SIZE(children) - 1; i++) { - long delta; - - if (children[i + 1].usage <= children[i].usage) - goto cleanup; - - delta = children[i + 1].usage - children[i].usage; - if (!values_close(delta, children[0].usage, 35)) - goto cleanup; - } + if (validate(children, ARRAY_SIZE(children))) + goto cleanup; ret = KSFT_PASS; cleanup: @@ -325,6 +298,111 @@ cleanup: return ret; } +static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus) +{ + long usage_seconds = 10; + struct cpu_hog_func_param param = { + .nprocs = ncpus, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m); +} + +static pid_t weight_hog_all_cpus(const struct cpu_hogger *child) +{ + return weight_hog_ncpus(child, get_nprocs()); +} + +static int +overprovision_validate(const struct cpu_hogger *children, int num_children) +{ + int ret = KSFT_FAIL, i; + + for (i = 0; i < num_children - 1; i++) { + long delta; + + if (children[i + 1].usage <= children[i].usage) + goto cleanup; + + delta = children[i + 1].usage - children[i].usage; + if (!values_close(delta, children[0].usage, 35)) + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 50 + * A/C cpu.weight = 100 + * A/D cpu.weight = 150 + * + * A separate process is then created for each child cgroup which spawns as + * many threads as there are cores, and hogs each CPU as much as possible + * for some time interval. + * + * Once all of the children have exited, we verify that each child cgroup + * was given proportional runtime as informed by their cpu.weight. + */ +static int test_cpucg_weight_overprovisioned(const char *root) +{ + return run_cpucg_weight_test(root, weight_hog_all_cpus, + overprovision_validate); +} + +static pid_t weight_hog_one_cpu(const struct cpu_hogger *child) +{ + return weight_hog_ncpus(child, 1); +} + +static int +underprovision_validate(const struct cpu_hogger *children, int num_children) +{ + int ret = KSFT_FAIL, i; + + for (i = 0; i < num_children - 1; i++) { + if (!values_close(children[i + 1].usage, children[0].usage, 15)) + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 50 + * A/C cpu.weight = 100 + * A/D cpu.weight = 150 + * + * A separate process is then created for each child cgroup which spawns a + * single thread that hogs a CPU. The testcase is only run on systems that + * have at least one core per-thread in the child processes. + * + * Once all of the children have exited, we verify that each child cgroup + * had roughly the same runtime despite having different cpu.weight. + */ +static int test_cpucg_weight_underprovisioned(const char *root) +{ + // Only run the test if there are enough cores to avoid overprovisioning + // the system. + if (get_nprocs() < 4) + return KSFT_SKIP; + + return run_cpucg_weight_test(root, weight_hog_one_cpu, + underprovision_validate); +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -333,6 +411,7 @@ struct cpucg_test { T(test_cpucg_subtree_control), T(test_cpucg_stats), T(test_cpucg_weight_overprovisioned), + T(test_cpucg_weight_underprovisioned), }; #undef T From b76ee4f576ebfbab3891e51a531ec7ad7ef10a7a Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 23 Apr 2022 05:30:50 -0700 Subject: [PATCH 05/13] cgroup: Adding test_cpucg_nested_weight_overprovisioned() testcase The cgroup cpu controller tests in tools/testing/selftests/cgroup/test_cpu.c have some testcases that validate the expected behavior of setting cpu.weight on cgroups, and then hogging CPUs. What is still missing from the suite is a testcase that validates nested cgroups. This patch adds test_cpucg_nested_weight_overprovisioned(), which validates that a parent's cpu.weight will override its children if they overcommit a host, and properly protect any sibling groups of that parent. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 122 ++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 64f9ce91c992..fc90b4d0feb9 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -403,6 +403,127 @@ static int test_cpucg_weight_underprovisioned(const char *root) underprovision_validate); } +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 1000 + * A/C cpu.weight = 1000 + * A/C/D cpu.weight = 5000 + * A/C/E cpu.weight = 5000 + * + * A separate process is then created for each leaf, which spawn nproc threads + * that burn a CPU for a few seconds. + * + * Once all of those processes have exited, we verify that each of the leaf + * cgroups have roughly the same usage from cpu.stat. + */ +static int +test_cpucg_nested_weight_overprovisioned(const char *root) +{ + int ret = KSFT_FAIL, i; + char *parent = NULL, *child = NULL; + struct cpu_hogger leaf[3] = {NULL}; + long nested_leaf_usage, child_usage; + int nprocs = get_nprocs(); + + parent = cg_name(root, "cpucg_test"); + child = cg_name(parent, "cpucg_child"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + if (cg_write(child, "cgroup.subtree_control", "+cpu")) + goto cleanup; + if (cg_write(child, "cpu.weight", "1000")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + const char *ancestor; + long weight; + + if (i == 0) { + ancestor = parent; + weight = 1000; + } else { + ancestor = child; + weight = 5000; + } + leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i); + if (!leaf[i].cgroup) + goto cleanup; + + if (cg_create(leaf[i].cgroup)) + goto cleanup; + + if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + pid_t pid; + struct cpu_hog_func_param param = { + .nprocs = nprocs, + .ts = { + .tv_sec = 10, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + + pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed, + (void *)¶m); + if (pid <= 0) + goto cleanup; + leaf[i].pid = pid; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + int retcode; + + waitpid(leaf[i].pid, &retcode, 0); + if (!WIFEXITED(retcode)) + goto cleanup; + if (WEXITSTATUS(retcode)) + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + leaf[i].usage = cg_read_key_long(leaf[i].cgroup, + "cpu.stat", "usage_usec"); + if (leaf[i].usage <= 0) + goto cleanup; + } + + nested_leaf_usage = leaf[1].usage + leaf[2].usage; + if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + goto cleanup; + + child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); + if (child_usage <= 0) + goto cleanup; + if (!values_close(child_usage, nested_leaf_usage, 1)) + goto cleanup; + + ret = KSFT_PASS; +cleanup: + for (i = 0; i < ARRAY_SIZE(leaf); i++) { + cg_destroy(leaf[i].cgroup); + free(leaf[i].cgroup); + } + cg_destroy(child); + free(child); + cg_destroy(parent); + free(parent); + + return ret; +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -412,6 +533,7 @@ struct cpucg_test { T(test_cpucg_stats), T(test_cpucg_weight_overprovisioned), T(test_cpucg_weight_underprovisioned), + T(test_cpucg_nested_weight_overprovisioned), }; #undef T From 89ca0efa8468f230df965257d0c03fc3664b4331 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 23 Apr 2022 05:30:51 -0700 Subject: [PATCH 06/13] cgroup: Add test_cpucg_nested_weight_underprovisioned() testcase The cgroup cpu controller test suite currently contains a testcase called test_cpucg_nested_weight_underprovisioned() which verifies the expected behavior of cpu.weight when applied to nested cgroups. That first testcase validated the expected behavior when the processes in the leaf cgroups overcommitted the system. This patch adds a complementary test_cpucg_nested_weight_underprovisioned() testcase which validates behavior when those leaf cgroups undercommit the system. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 73 ++++++++++++++++++----- 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index fc90b4d0feb9..de6289814c23 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -403,22 +403,8 @@ static int test_cpucg_weight_underprovisioned(const char *root) underprovision_validate); } -/* - * First, this test creates the following hierarchy: - * A - * A/B cpu.weight = 1000 - * A/C cpu.weight = 1000 - * A/C/D cpu.weight = 5000 - * A/C/E cpu.weight = 5000 - * - * A separate process is then created for each leaf, which spawn nproc threads - * that burn a CPU for a few seconds. - * - * Once all of those processes have exited, we verify that each of the leaf - * cgroups have roughly the same usage from cpu.stat. - */ static int -test_cpucg_nested_weight_overprovisioned(const char *root) +run_cpucg_nested_weight_test(const char *root, bool overprovisioned) { int ret = KSFT_FAIL, i; char *parent = NULL, *child = NULL; @@ -426,6 +412,16 @@ test_cpucg_nested_weight_overprovisioned(const char *root) long nested_leaf_usage, child_usage; int nprocs = get_nprocs(); + if (!overprovisioned) { + if (nprocs < 4) + /* + * Only run the test if there are enough cores to avoid overprovisioning + * the system. + */ + return KSFT_SKIP; + nprocs /= 4; + } + parent = cg_name(root, "cpucg_test"); child = cg_name(parent, "cpucg_child"); if (!parent || !child) @@ -501,9 +497,13 @@ test_cpucg_nested_weight_overprovisioned(const char *root) } nested_leaf_usage = leaf[1].usage + leaf[2].usage; - if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + if (overprovisioned) { + if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + goto cleanup; + } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) goto cleanup; + child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); if (child_usage <= 0) goto cleanup; @@ -524,6 +524,46 @@ cleanup: return ret; } +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 1000 + * A/C cpu.weight = 1000 + * A/C/D cpu.weight = 5000 + * A/C/E cpu.weight = 5000 + * + * A separate process is then created for each leaf, which spawn nproc threads + * that burn a CPU for a few seconds. + * + * Once all of those processes have exited, we verify that each of the leaf + * cgroups have roughly the same usage from cpu.stat. + */ +static int +test_cpucg_nested_weight_overprovisioned(const char *root) +{ + return run_cpucg_nested_weight_test(root, true); +} + +/* + * First, this test creates the following hierarchy: + * A + * A/B cpu.weight = 1000 + * A/C cpu.weight = 1000 + * A/C/D cpu.weight = 5000 + * A/C/E cpu.weight = 5000 + * + * A separate process is then created for each leaf, which nproc / 4 threads + * that burns a CPU for a few seconds. + * + * Once all of those processes have exited, we verify that each of the leaf + * cgroups have roughly the same usage from cpu.stat. + */ +static int +test_cpucg_nested_weight_underprovisioned(const char *root) +{ + return run_cpucg_nested_weight_test(root, false); +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -534,6 +574,7 @@ struct cpucg_test { T(test_cpucg_weight_overprovisioned), T(test_cpucg_weight_underprovisioned), T(test_cpucg_nested_weight_overprovisioned), + T(test_cpucg_nested_weight_underprovisioned), }; #undef T From 889ab8113ef1386c57d64da106b850e752949f07 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 23 Apr 2022 05:30:52 -0700 Subject: [PATCH 07/13] cgroup: Add test_cpucg_max() testcase The cgroup cpu controller test suite has a number of testcases that validate the expected behavior of the cpu.weight knob, but none for cpu.max. This testcase fixes that by adding a testcase for cpu.max as well. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index de6289814c23..715922c15c78 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -564,6 +564,59 @@ test_cpucg_nested_weight_underprovisioned(const char *root) return run_cpucg_nested_weight_test(root, false); } +/* + * This test creates a cgroup with some maximum value within a period, and + * verifies that a process in the cgroup is not overscheduled. + */ +static int test_cpucg_max(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec; + long usage_seconds = 1; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + if (cg_write(cpucg, "cpu.max", "1000")) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (user_usec >= expected_usage_usec) + goto cleanup; + + if (values_close(usage_usec, expected_usage_usec, 95)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -575,6 +628,7 @@ struct cpucg_test { T(test_cpucg_weight_underprovisioned), T(test_cpucg_nested_weight_overprovisioned), T(test_cpucg_nested_weight_underprovisioned), + T(test_cpucg_max), }; #undef T From a79906570f9646ae174dd0899ea54cc2eeffd788 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 23 Apr 2022 05:30:53 -0700 Subject: [PATCH 08/13] cgroup: Add test_cpucg_max_nested() testcase The cgroup cpu controller selftests have a test_cpucg_max() testcase that validates the behavior of the cpu.max knob. Let's also add a testcase that verifies that the behavior works correctly when set on a nested cgroup. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 63 +++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 715922c15c78..24020a2c68dc 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -617,6 +617,68 @@ cleanup: return ret; } +/* + * This test verifies that a process inside of a nested cgroup whose parent + * group has a cpu.max value set, is properly throttled. + */ +static int test_cpucg_max_nested(const char *root) +{ + int ret = KSFT_FAIL; + long usage_usec, user_usec; + long usage_seconds = 1; + long expected_usage_usec = usage_seconds * USEC_PER_SEC; + char *parent, *child; + + parent = cg_name(root, "cpucg_parent"); + child = cg_name(parent, "cpucg_child"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_write(parent, "cpu.max", "1000")) + goto cleanup; + + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_WALL, + }; + if (cg_run(child, hog_cpus_timed, (void *)¶m)) + goto cleanup; + + usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); + user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); + if (user_usec <= 0) + goto cleanup; + + if (user_usec >= expected_usage_usec) + goto cleanup; + + if (values_close(usage_usec, expected_usage_usec, 95)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + free(child); + cg_destroy(parent); + free(parent); + + return ret; +} + #define T(x) { x, #x } struct cpucg_test { int (*fn)(const char *root); @@ -629,6 +691,7 @@ struct cpucg_test { T(test_cpucg_nested_weight_overprovisioned), T(test_cpucg_nested_weight_underprovisioned), T(test_cpucg_max), + T(test_cpucg_max_nested), }; #undef T From 5c26993c31f0f726aa1f90158f17ec95069b7cb2 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Sat, 23 Apr 2022 05:30:54 -0700 Subject: [PATCH 09/13] cgroup: Add config file to cgroup selftest suite Most of the test suites in tools/testing/selftests contain a config file that specifies which kernel config options need to be present in order for the test suite to be able to run and perform meaningful validation. There is no config file for the tools/testing/selftests/cgroup test suite, so this patch adds one. Signed-off-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/config | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tools/testing/selftests/cgroup/config diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config new file mode 100644 index 000000000000..84fe884fad86 --- /dev/null +++ b/tools/testing/selftests/cgroup/config @@ -0,0 +1,8 @@ +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_MEMCG=y +CONFIG_MEMCG_KMEM=y +CONFIG_MEMCG_SWAP=y +CONFIG_PAGE_COUNTER=y From 54de76c0123915e7533ce352de30a1f2d80fe81f Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Thu, 12 May 2022 10:34:39 -0400 Subject: [PATCH 10/13] kselftest/cgroup: fix test_stress.sh to use OUTPUT dir Running cgroup kselftest with O= fails to run the with_stress test due to hardcoded ./test_core. Find test_core binary using the OUTPUT directory. Fixes: 1a99fcc035fb ("selftests: cgroup: Run test_core under interfering stress") Signed-off-by: Phil Auld Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh index 15d9d5896394..109c044f715f 100755 --- a/tools/testing/selftests/cgroup/test_stress.sh +++ b/tools/testing/selftests/cgroup/test_stress.sh @@ -1,4 +1,4 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -./with_stress.sh -s subsys -s fork ./test_core +./with_stress.sh -s subsys -s fork ${OUTPUT}/test_core From 213adc63dfbcdff9a0c19ec1f2681fda9c05cf6d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 13 May 2022 15:09:28 -0400 Subject: [PATCH 11/13] kseltest/cgroup: Make test_stress.sh work if run interactively Commit 54de76c01239 ("kselftest/cgroup: fix test_stress.sh to use OUTPUT dir") changes the test_core command path from . to $OUTPUT. However, variable OUTPUT may not be defined if the command is run interactively. Fix that by using ${OUTPUT:-.} to cover both cases. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh index 109c044f715f..3c9c4554d5f6 100755 --- a/tools/testing/selftests/cgroup/test_stress.sh +++ b/tools/testing/selftests/cgroup/test_stress.sh @@ -1,4 +1,4 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -./with_stress.sh -s subsys -s fork ${OUTPUT}/test_core +./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core From 29ed17389c4dcd09c5be8d88ddf6f4f60241567d Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Tue, 17 May 2022 19:25:23 +0800 Subject: [PATCH 12/13] cgroup: Make cgroup_debug static Make cgroup_debug static since it's only used in cgroup.c Signed-off-by: Xiu Jianfeng Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-internal.h | 1 - kernel/cgroup/cgroup.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 6e36e854b512..5da09c74228d 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -12,7 +12,6 @@ #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; -extern bool cgroup_debug; extern void __init enable_debug_cgroup(void); /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index adb820e98f24..a97fd051430b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(css_set_lock); DEFINE_SPINLOCK(trace_cgroup_path_lock); char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; -bool cgroup_debug __read_mostly; +static bool cgroup_debug __read_mostly; /* * Protects cgroup_idr and css_idr so that IDs can be released without From b154a017c92011d8f71ce804583e5f9c3d90bb9a Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Wed, 18 May 2022 09:36:47 +0800 Subject: [PATCH 13/13] cgroup: remove the superfluous judgment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the superfluous judgment since the function is never called for a root cgroup, as suggested by Tejun. Suggested-by: Tejun Heo Signed-off-by: Shida Zhang Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a97fd051430b..1779ccddb734 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5685,7 +5685,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) css_clear_dir(&cgrp->self); kernfs_remove(cgrp->kn); - if (parent && cgroup_is_threaded(cgrp)) + if (cgroup_is_threaded(cgrp)) parent->nr_threaded_children--; spin_lock_irq(&css_set_lock);