forked from Minki/linux
cgroup: Make !percpu threadgroup_rwsem operations optional
3942a9bd7b
("locking, rcu, cgroup: Avoid synchronize_sched() in
__cgroup_procs_write()") disabled percpu operations on threadgroup_rwsem
because the impiled synchronize_rcu() on write locking was pushing up the
latencies too much for android which constantly moves processes between
cgroups.
This makes the hotter paths - fork and exit - slower as they're always
forced into the slow path. There is no reason to force this on everyone
especially given that more common static usage pattern can now completely
avoid write-locking the rwsem. Write-locking is elided when turning on and
off controllers on empty sub-trees and CLONE_INTO_CGROUP enables seeding a
cgroup without grabbing the rwsem.
Restore the default percpu operations and introduce the mount option
"favordynmods" and config option CGROUP_FAVOR_DYNMODS for users who need
lower latencies for the dynamic operations.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Michal Koutn� <mkoutny@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Dmitry Shmidt <dimitrysh@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
This commit is contained in:
parent
30312730bd
commit
6a010a49b6
@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
|
||||
ignored on non-init namespace mounts. Please refer to the
|
||||
Delegation section for details.
|
||||
|
||||
[no]favordynmods
|
||||
Reduce the latencies of dynamic cgroup modifications such as
|
||||
task migrations and controller on/offs at the cost of making
|
||||
hot path operations such as forks and exits more expensive.
|
||||
The static usage pattern of creating a cgroup, enabling
|
||||
controllers, and then seeding it with CLONE_INTO_CGROUP is
|
||||
not affected by this option.
|
||||
|
||||
memory_[no]localevents
|
||||
Only populate memory.events with data for the current cgroup,
|
||||
and not any subtrees. This is legacy behaviour, the default
|
||||
|
@ -88,20 +88,33 @@ enum {
|
||||
*/
|
||||
CGRP_ROOT_NS_DELEGATE = (1 << 3),
|
||||
|
||||
/*
|
||||
* Reduce latencies on dynamic cgroup modifications such as task
|
||||
* migrations and controller on/offs by disabling percpu operation on
|
||||
* cgroup_threadgroup_rwsem. This makes hot path operations such as
|
||||
* forks and exits into the slow path and more expensive.
|
||||
*
|
||||
* The static usage pattern of creating a cgroup, enabling controllers,
|
||||
* and then seeding it with CLONE_INTO_CGROUP doesn't require write
|
||||
* locking cgroup_threadgroup_rwsem and thus doesn't benefit from
|
||||
* favordynmod.
|
||||
*/
|
||||
CGRP_ROOT_FAVOR_DYNMODS = (1 << 4),
|
||||
|
||||
/*
|
||||
* Enable cpuset controller in v1 cgroup to use v2 behavior.
|
||||
*/
|
||||
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
|
||||
CGRP_ROOT_CPUSET_V2_MODE = (1 << 16),
|
||||
|
||||
/*
|
||||
* Enable legacy local memory.events.
|
||||
*/
|
||||
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
|
||||
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17),
|
||||
|
||||
/*
|
||||
* Enable recursive subtree protection
|
||||
*/
|
||||
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
|
||||
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
|
||||
};
|
||||
|
||||
/* cftype->flags */
|
||||
|
10
init/Kconfig
10
init/Kconfig
@ -936,6 +936,16 @@ if CGROUPS
|
||||
config PAGE_COUNTER
|
||||
bool
|
||||
|
||||
config CGROUP_FAVOR_DYNMODS
|
||||
bool "Favor dynamic modification latency reduction by default"
|
||||
help
|
||||
This option enables the "favordynmods" mount option by default
|
||||
which reduces the latencies of dynamic cgroup modifications such
|
||||
as task migrations and controller on/offs at the cost of making
|
||||
hot path operations such as forks and exits more expensive.
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config MEMCG
|
||||
bool "Memory controller"
|
||||
select PAGE_COUNTER
|
||||
|
@ -233,6 +233,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn);
|
||||
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||
struct cgroup_namespace *ns);
|
||||
|
||||
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor);
|
||||
void cgroup_free_root(struct cgroup_root *root);
|
||||
void init_cgroup_root(struct cgroup_fs_context *ctx);
|
||||
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
|
||||
|
@ -875,6 +875,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
|
||||
seq_puts(seq, ",xattr");
|
||||
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
|
||||
seq_puts(seq, ",cpuset_v2_mode");
|
||||
if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
|
||||
seq_puts(seq, ",favordynmods");
|
||||
|
||||
spin_lock(&release_agent_path_lock);
|
||||
if (strlen(root->release_agent_path))
|
||||
@ -898,6 +900,8 @@ enum cgroup1_param {
|
||||
Opt_noprefix,
|
||||
Opt_release_agent,
|
||||
Opt_xattr,
|
||||
Opt_favordynmods,
|
||||
Opt_nofavordynmods,
|
||||
};
|
||||
|
||||
const struct fs_parameter_spec cgroup1_fs_parameters[] = {
|
||||
@ -909,6 +913,8 @@ const struct fs_parameter_spec cgroup1_fs_parameters[] = {
|
||||
fsparam_flag ("noprefix", Opt_noprefix),
|
||||
fsparam_string("release_agent", Opt_release_agent),
|
||||
fsparam_flag ("xattr", Opt_xattr),
|
||||
fsparam_flag ("favordynmods", Opt_favordynmods),
|
||||
fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
|
||||
{}
|
||||
};
|
||||
|
||||
@ -960,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
case Opt_xattr:
|
||||
ctx->flags |= CGRP_ROOT_XATTR;
|
||||
break;
|
||||
case Opt_favordynmods:
|
||||
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||
break;
|
||||
case Opt_nofavordynmods:
|
||||
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||
break;
|
||||
case Opt_release_agent:
|
||||
/* Specifying two release agents is forbidden */
|
||||
if (ctx->release_agent)
|
||||
@ -1211,8 +1223,11 @@ static int cgroup1_root_to_use(struct fs_context *fc)
|
||||
init_cgroup_root(ctx);
|
||||
|
||||
ret = cgroup_setup_root(root, ctx->subsys_mask);
|
||||
if (ret)
|
||||
if (!ret)
|
||||
cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
|
||||
else
|
||||
cgroup_free_root(root);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1305,6 +1305,20 @@ struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
|
||||
return root_cgrp->root;
|
||||
}
|
||||
|
||||
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor)
|
||||
{
|
||||
bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS;
|
||||
|
||||
/* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */
|
||||
if (favor && !favoring) {
|
||||
rcu_sync_enter(&cgroup_threadgroup_rwsem.rss);
|
||||
root->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||
} else if (!favor && favoring) {
|
||||
rcu_sync_exit(&cgroup_threadgroup_rwsem.rss);
|
||||
root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||
}
|
||||
}
|
||||
|
||||
static int cgroup_init_root_id(struct cgroup_root *root)
|
||||
{
|
||||
int id;
|
||||
@ -1365,6 +1379,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
|
||||
cgroup_root_count--;
|
||||
}
|
||||
|
||||
cgroup_favor_dynmods(root, false);
|
||||
cgroup_exit_root_id(root);
|
||||
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
@ -1858,6 +1873,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
|
||||
|
||||
enum cgroup2_param {
|
||||
Opt_nsdelegate, Opt_nonsdelegate,
|
||||
Opt_favordynmods, Opt_nofavordynmods,
|
||||
Opt_memory_localevents, Opt_memory_nolocalevents,
|
||||
Opt_memory_recursiveprot, Opt_memory_norecursiveprot,
|
||||
nr__cgroup2_params
|
||||
@ -1866,6 +1882,8 @@ enum cgroup2_param {
|
||||
static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
|
||||
fsparam_flag("nsdelegate", Opt_nsdelegate),
|
||||
fsparam_flag("nonsdelegate", Opt_nonsdelegate),
|
||||
fsparam_flag("favordynmods", Opt_favordynmods),
|
||||
fsparam_flag("nofavordynmods", Opt_nofavordynmods),
|
||||
fsparam_flag("memory_localevents", Opt_memory_localevents),
|
||||
fsparam_flag("memory_nolocalevents", Opt_memory_nolocalevents),
|
||||
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
|
||||
@ -1890,6 +1908,12 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
|
||||
case Opt_nonsdelegate:
|
||||
ctx->flags &= ~CGRP_ROOT_NS_DELEGATE;
|
||||
return 0;
|
||||
case Opt_favordynmods:
|
||||
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||
return 0;
|
||||
case Opt_nofavordynmods:
|
||||
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||
return 0;
|
||||
case Opt_memory_localevents:
|
||||
ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
||||
return 0;
|
||||
@ -1914,6 +1938,9 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
|
||||
else
|
||||
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
|
||||
|
||||
cgroup_favor_dynmods(&cgrp_dfl_root,
|
||||
root_flags & CGRP_ROOT_FAVOR_DYNMODS);
|
||||
|
||||
if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
||||
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
||||
else
|
||||
@ -1930,6 +1957,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
|
||||
{
|
||||
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
|
||||
seq_puts(seq, ",nsdelegate");
|
||||
if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS)
|
||||
seq_puts(seq, ",favordynmods");
|
||||
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
||||
seq_puts(seq, ",memory_localevents");
|
||||
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
|
||||
@ -1980,7 +2009,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
|
||||
cgrp->root = root;
|
||||
init_cgroup_housekeeping(cgrp);
|
||||
|
||||
root->flags = ctx->flags;
|
||||
/* DYNMODS must be modified through cgroup_favor_dynmods() */
|
||||
root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||
if (ctx->release_agent)
|
||||
strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
|
||||
if (ctx->name)
|
||||
@ -2202,6 +2232,10 @@ static int cgroup_init_fs_context(struct fs_context *fc)
|
||||
put_user_ns(fc->user_ns);
|
||||
fc->user_ns = get_user_ns(ctx->ns->user_ns);
|
||||
fc->global = true;
|
||||
|
||||
#ifdef CONFIG_CGROUP_FAVOR_DYNMODS
|
||||
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5854,12 +5888,6 @@ int __init cgroup_init(void)
|
||||
|
||||
cgroup_rstat_boot();
|
||||
|
||||
/*
|
||||
* The latency of the synchronize_rcu() is too high for cgroups,
|
||||
* avoid it at the cost of forcing all readers into the slow path.
|
||||
*/
|
||||
rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
|
||||
|
||||
get_user_ns(init_cgroup_ns.user_ns);
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
@ -6771,6 +6799,7 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE,
|
||||
"nsdelegate\n"
|
||||
"favordynmods\n"
|
||||
"memory_localevents\n"
|
||||
"memory_recursiveprot\n");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user