mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
Several core optimizations:
* threadgroup_rwsem write locking is skipped when configuring controllers in empty subtrees. Combined with CLONE_INTO_CGROUP, this allows the common static usage pattern to not grab threadgroup_rwsem at all (glibc still doesn't seem ready for CLONE_INTO_CGROUP unfortunately). * threadgroup_rwsem used to be put into non-percpu mode by default due to latency concerns in specific use cases. There's no reason for everyone else to pay for it. Make the behavior optional. * psi no longer allocates memory when disabled. along with some code cleanups. -----BEGIN PGP SIGNATURE----- iIQEABYIACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCYugHIQ4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGd+oAP9lfD3fTRdNo4qWV2VsZsYzoOxzNIuJSwN/dnYx IEbQOwD/cd2YMfeo6zcb427U/VfTFqjJjFK04OeljYtJU8fFywo= =sucy -----END PGP SIGNATURE----- Merge tag 'cgroup-for-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: "Several core optimizations: - threadgroup_rwsem write locking is skipped when configuring controllers in empty subtrees. Combined with CLONE_INTO_CGROUP, this allows the common static usage pattern to not grab threadgroup_rwsem at all (glibc still doesn't seem ready for CLONE_INTO_CGROUP unfortunately). - threadgroup_rwsem used to be put into non-percpu mode by default due to latency concerns in specific use cases. There's no reason for everyone else to pay for it. Make the behavior optional. - psi no longer allocates memory when disabled. ... along with some code cleanups" * tag 'cgroup-for-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: Skip subtree root in cgroup_update_dfl_csses() cgroup: remove "no" prefixed mount options cgroup: Make !percpu threadgroup_rwsem operations optional cgroup: Add "no" prefixed mount options cgroup: Elide write-locking threadgroup_rwsem when updating csses on an empty subtree cgroup.c: remove redundant check for mixable cgroup in cgroup_migrate_vet_dst cgroup.c: add helper __cset_cgroup_from_root to cleanup duplicated codes psi: dont alloc memory for psi by default
This commit is contained in:
commit
b6bb70f9ab
@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
|
|||||||
ignored on non-init namespace mounts. Please refer to the
|
ignored on non-init namespace mounts. Please refer to the
|
||||||
Delegation section for details.
|
Delegation section for details.
|
||||||
|
|
||||||
|
favordynmods
|
||||||
|
Reduce the latencies of dynamic cgroup modifications such as
|
||||||
|
task migrations and controller on/offs at the cost of making
|
||||||
|
hot path operations such as forks and exits more expensive.
|
||||||
|
The static usage pattern of creating a cgroup, enabling
|
||||||
|
controllers, and then seeding it with CLONE_INTO_CGROUP is
|
||||||
|
not affected by this option.
|
||||||
|
|
||||||
memory_localevents
|
memory_localevents
|
||||||
Only populate memory.events with data for the current cgroup,
|
Only populate memory.events with data for the current cgroup,
|
||||||
and not any subtrees. This is legacy behaviour, the default
|
and not any subtrees. This is legacy behaviour, the default
|
||||||
|
@ -88,20 +88,33 @@ enum {
|
|||||||
*/
|
*/
|
||||||
CGRP_ROOT_NS_DELEGATE = (1 << 3),
|
CGRP_ROOT_NS_DELEGATE = (1 << 3),
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reduce latencies on dynamic cgroup modifications such as task
|
||||||
|
* migrations and controller on/offs by disabling percpu operation on
|
||||||
|
* cgroup_threadgroup_rwsem. This makes hot path operations such as
|
||||||
|
* forks and exits into the slow path and more expensive.
|
||||||
|
*
|
||||||
|
* The static usage pattern of creating a cgroup, enabling controllers,
|
||||||
|
* and then seeding it with CLONE_INTO_CGROUP doesn't require write
|
||||||
|
* locking cgroup_threadgroup_rwsem and thus doesn't benefit from
|
||||||
|
* favordynmod.
|
||||||
|
*/
|
||||||
|
CGRP_ROOT_FAVOR_DYNMODS = (1 << 4),
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable cpuset controller in v1 cgroup to use v2 behavior.
|
* Enable cpuset controller in v1 cgroup to use v2 behavior.
|
||||||
*/
|
*/
|
||||||
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
|
CGRP_ROOT_CPUSET_V2_MODE = (1 << 16),
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable legacy local memory.events.
|
* Enable legacy local memory.events.
|
||||||
*/
|
*/
|
||||||
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
|
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17),
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable recursive subtree protection
|
* Enable recursive subtree protection
|
||||||
*/
|
*/
|
||||||
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
|
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cftype->flags */
|
/* cftype->flags */
|
||||||
@ -480,7 +493,7 @@ struct cgroup {
|
|||||||
struct work_struct release_agent_work;
|
struct work_struct release_agent_work;
|
||||||
|
|
||||||
/* used to track pressure stalls */
|
/* used to track pressure stalls */
|
||||||
struct psi_group psi;
|
struct psi_group *psi;
|
||||||
|
|
||||||
/* used to store eBPF programs */
|
/* used to store eBPF programs */
|
||||||
struct cgroup_bpf bpf;
|
struct cgroup_bpf bpf;
|
||||||
|
@ -674,7 +674,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
|
|||||||
|
|
||||||
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
||||||
{
|
{
|
||||||
return &cgrp->psi;
|
return cgrp->psi;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cgroup_psi_enabled(void);
|
bool cgroup_psi_enabled(void);
|
||||||
|
10
init/Kconfig
10
init/Kconfig
@ -945,6 +945,16 @@ if CGROUPS
|
|||||||
config PAGE_COUNTER
|
config PAGE_COUNTER
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config CGROUP_FAVOR_DYNMODS
|
||||||
|
bool "Favor dynamic modification latency reduction by default"
|
||||||
|
help
|
||||||
|
This option enables the "favordynmods" mount option by default
|
||||||
|
which reduces the latencies of dynamic cgroup modifications such
|
||||||
|
as task migrations and controller on/offs at the cost of making
|
||||||
|
hot path operations such as forks and exits more expensive.
|
||||||
|
|
||||||
|
Say N if unsure.
|
||||||
|
|
||||||
config MEMCG
|
config MEMCG
|
||||||
bool "Memory controller"
|
bool "Memory controller"
|
||||||
select PAGE_COUNTER
|
select PAGE_COUNTER
|
||||||
|
@ -233,6 +233,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn);
|
|||||||
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||||
struct cgroup_namespace *ns);
|
struct cgroup_namespace *ns);
|
||||||
|
|
||||||
|
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor);
|
||||||
void cgroup_free_root(struct cgroup_root *root);
|
void cgroup_free_root(struct cgroup_root *root);
|
||||||
void init_cgroup_root(struct cgroup_fs_context *ctx);
|
void init_cgroup_root(struct cgroup_fs_context *ctx);
|
||||||
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
|
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
|
||||||
|
@ -875,6 +875,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
|
|||||||
seq_puts(seq, ",xattr");
|
seq_puts(seq, ",xattr");
|
||||||
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
|
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
|
||||||
seq_puts(seq, ",cpuset_v2_mode");
|
seq_puts(seq, ",cpuset_v2_mode");
|
||||||
|
if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
|
||||||
|
seq_puts(seq, ",favordynmods");
|
||||||
|
|
||||||
spin_lock(&release_agent_path_lock);
|
spin_lock(&release_agent_path_lock);
|
||||||
if (strlen(root->release_agent_path))
|
if (strlen(root->release_agent_path))
|
||||||
@ -898,6 +900,8 @@ enum cgroup1_param {
|
|||||||
Opt_noprefix,
|
Opt_noprefix,
|
||||||
Opt_release_agent,
|
Opt_release_agent,
|
||||||
Opt_xattr,
|
Opt_xattr,
|
||||||
|
Opt_favordynmods,
|
||||||
|
Opt_nofavordynmods,
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct fs_parameter_spec cgroup1_fs_parameters[] = {
|
const struct fs_parameter_spec cgroup1_fs_parameters[] = {
|
||||||
@ -909,6 +913,8 @@ const struct fs_parameter_spec cgroup1_fs_parameters[] = {
|
|||||||
fsparam_flag ("noprefix", Opt_noprefix),
|
fsparam_flag ("noprefix", Opt_noprefix),
|
||||||
fsparam_string("release_agent", Opt_release_agent),
|
fsparam_string("release_agent", Opt_release_agent),
|
||||||
fsparam_flag ("xattr", Opt_xattr),
|
fsparam_flag ("xattr", Opt_xattr),
|
||||||
|
fsparam_flag ("favordynmods", Opt_favordynmods),
|
||||||
|
fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -960,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
|||||||
case Opt_xattr:
|
case Opt_xattr:
|
||||||
ctx->flags |= CGRP_ROOT_XATTR;
|
ctx->flags |= CGRP_ROOT_XATTR;
|
||||||
break;
|
break;
|
||||||
|
case Opt_favordynmods:
|
||||||
|
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
break;
|
||||||
|
case Opt_nofavordynmods:
|
||||||
|
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
break;
|
||||||
case Opt_release_agent:
|
case Opt_release_agent:
|
||||||
/* Specifying two release agents is forbidden */
|
/* Specifying two release agents is forbidden */
|
||||||
if (ctx->release_agent)
|
if (ctx->release_agent)
|
||||||
@ -1211,8 +1223,11 @@ static int cgroup1_root_to_use(struct fs_context *fc)
|
|||||||
init_cgroup_root(ctx);
|
init_cgroup_root(ctx);
|
||||||
|
|
||||||
ret = cgroup_setup_root(root, ctx->subsys_mask);
|
ret = cgroup_setup_root(root, ctx->subsys_mask);
|
||||||
if (ret)
|
if (!ret)
|
||||||
|
cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
|
||||||
|
else
|
||||||
cgroup_free_root(root);
|
cgroup_free_root(root);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -279,8 +279,6 @@ bool cgroup_ssid_enabled(int ssid)
|
|||||||
*
|
*
|
||||||
* - When mounting an existing superblock, mount options should match.
|
* - When mounting an existing superblock, mount options should match.
|
||||||
*
|
*
|
||||||
* - Remount is disallowed.
|
|
||||||
*
|
|
||||||
* - rename(2) is disallowed.
|
* - rename(2) is disallowed.
|
||||||
*
|
*
|
||||||
* - "tasks" is removed. Everything should be at process granularity. Use
|
* - "tasks" is removed. Everything should be at process granularity. Use
|
||||||
@ -1309,6 +1307,20 @@ struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
|
|||||||
return root_cgrp->root;
|
return root_cgrp->root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor)
|
||||||
|
{
|
||||||
|
bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
|
||||||
|
/* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */
|
||||||
|
if (favor && !favoring) {
|
||||||
|
rcu_sync_enter(&cgroup_threadgroup_rwsem.rss);
|
||||||
|
root->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
} else if (!favor && favoring) {
|
||||||
|
rcu_sync_exit(&cgroup_threadgroup_rwsem.rss);
|
||||||
|
root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int cgroup_init_root_id(struct cgroup_root *root)
|
static int cgroup_init_root_id(struct cgroup_root *root)
|
||||||
{
|
{
|
||||||
int id;
|
int id;
|
||||||
@ -1369,6 +1381,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
|
|||||||
cgroup_root_count--;
|
cgroup_root_count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cgroup_favor_dynmods(root, false);
|
||||||
cgroup_exit_root_id(root);
|
cgroup_exit_root_id(root);
|
||||||
|
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
@ -1378,6 +1391,31 @@ static void cgroup_destroy_root(struct cgroup_root *root)
|
|||||||
cgroup_free_root(root);
|
cgroup_free_root(root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
|
||||||
|
struct cgroup_root *root)
|
||||||
|
{
|
||||||
|
struct cgroup *res_cgroup = NULL;
|
||||||
|
|
||||||
|
if (cset == &init_css_set) {
|
||||||
|
res_cgroup = &root->cgrp;
|
||||||
|
} else if (root == &cgrp_dfl_root) {
|
||||||
|
res_cgroup = cset->dfl_cgrp;
|
||||||
|
} else {
|
||||||
|
struct cgrp_cset_link *link;
|
||||||
|
|
||||||
|
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
|
||||||
|
struct cgroup *c = link->cgrp;
|
||||||
|
|
||||||
|
if (c->root == root) {
|
||||||
|
res_cgroup = c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res_cgroup;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* look up cgroup associated with current task's cgroup namespace on the
|
* look up cgroup associated with current task's cgroup namespace on the
|
||||||
* specified hierarchy
|
* specified hierarchy
|
||||||
@ -1393,22 +1431,8 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
cset = current->nsproxy->cgroup_ns->root_cset;
|
cset = current->nsproxy->cgroup_ns->root_cset;
|
||||||
if (cset == &init_css_set) {
|
res = __cset_cgroup_from_root(cset, root);
|
||||||
res = &root->cgrp;
|
|
||||||
} else if (root == &cgrp_dfl_root) {
|
|
||||||
res = cset->dfl_cgrp;
|
|
||||||
} else {
|
|
||||||
struct cgrp_cset_link *link;
|
|
||||||
|
|
||||||
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
|
|
||||||
struct cgroup *c = link->cgrp;
|
|
||||||
|
|
||||||
if (c->root == root) {
|
|
||||||
res = c;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
BUG_ON(!res);
|
BUG_ON(!res);
|
||||||
@ -1424,22 +1448,7 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
|
|||||||
lockdep_assert_held(&cgroup_mutex);
|
lockdep_assert_held(&cgroup_mutex);
|
||||||
lockdep_assert_held(&css_set_lock);
|
lockdep_assert_held(&css_set_lock);
|
||||||
|
|
||||||
if (cset == &init_css_set) {
|
res = __cset_cgroup_from_root(cset, root);
|
||||||
res = &root->cgrp;
|
|
||||||
} else if (root == &cgrp_dfl_root) {
|
|
||||||
res = cset->dfl_cgrp;
|
|
||||||
} else {
|
|
||||||
struct cgrp_cset_link *link;
|
|
||||||
|
|
||||||
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
|
|
||||||
struct cgroup *c = link->cgrp;
|
|
||||||
|
|
||||||
if (c->root == root) {
|
|
||||||
res = c;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
BUG_ON(!res);
|
BUG_ON(!res);
|
||||||
return res;
|
return res;
|
||||||
@ -1866,6 +1875,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
|
|||||||
|
|
||||||
enum cgroup2_param {
|
enum cgroup2_param {
|
||||||
Opt_nsdelegate,
|
Opt_nsdelegate,
|
||||||
|
Opt_favordynmods,
|
||||||
Opt_memory_localevents,
|
Opt_memory_localevents,
|
||||||
Opt_memory_recursiveprot,
|
Opt_memory_recursiveprot,
|
||||||
nr__cgroup2_params
|
nr__cgroup2_params
|
||||||
@ -1873,6 +1883,7 @@ enum cgroup2_param {
|
|||||||
|
|
||||||
static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
|
static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
|
||||||
fsparam_flag("nsdelegate", Opt_nsdelegate),
|
fsparam_flag("nsdelegate", Opt_nsdelegate),
|
||||||
|
fsparam_flag("favordynmods", Opt_favordynmods),
|
||||||
fsparam_flag("memory_localevents", Opt_memory_localevents),
|
fsparam_flag("memory_localevents", Opt_memory_localevents),
|
||||||
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
|
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
|
||||||
{}
|
{}
|
||||||
@ -1892,6 +1903,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
|
|||||||
case Opt_nsdelegate:
|
case Opt_nsdelegate:
|
||||||
ctx->flags |= CGRP_ROOT_NS_DELEGATE;
|
ctx->flags |= CGRP_ROOT_NS_DELEGATE;
|
||||||
return 0;
|
return 0;
|
||||||
|
case Opt_favordynmods:
|
||||||
|
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
return 0;
|
||||||
case Opt_memory_localevents:
|
case Opt_memory_localevents:
|
||||||
ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
||||||
return 0;
|
return 0;
|
||||||
@ -1910,6 +1924,9 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
|
|||||||
else
|
else
|
||||||
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
|
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
|
||||||
|
|
||||||
|
cgroup_favor_dynmods(&cgrp_dfl_root,
|
||||||
|
root_flags & CGRP_ROOT_FAVOR_DYNMODS);
|
||||||
|
|
||||||
if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
||||||
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
|
||||||
else
|
else
|
||||||
@ -1926,6 +1943,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
|
|||||||
{
|
{
|
||||||
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
|
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
|
||||||
seq_puts(seq, ",nsdelegate");
|
seq_puts(seq, ",nsdelegate");
|
||||||
|
if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS)
|
||||||
|
seq_puts(seq, ",favordynmods");
|
||||||
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
|
||||||
seq_puts(seq, ",memory_localevents");
|
seq_puts(seq, ",memory_localevents");
|
||||||
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
|
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
|
||||||
@ -1976,7 +1995,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
|
|||||||
cgrp->root = root;
|
cgrp->root = root;
|
||||||
init_cgroup_housekeeping(cgrp);
|
init_cgroup_housekeeping(cgrp);
|
||||||
|
|
||||||
root->flags = ctx->flags;
|
/* DYNMODS must be modified through cgroup_favor_dynmods() */
|
||||||
|
root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
if (ctx->release_agent)
|
if (ctx->release_agent)
|
||||||
strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
|
strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
|
||||||
if (ctx->name)
|
if (ctx->name)
|
||||||
@ -2198,6 +2218,10 @@ static int cgroup_init_fs_context(struct fs_context *fc)
|
|||||||
put_user_ns(fc->user_ns);
|
put_user_ns(fc->user_ns);
|
||||||
fc->user_ns = get_user_ns(ctx->ns->user_ns);
|
fc->user_ns = get_user_ns(ctx->ns->user_ns);
|
||||||
fc->global = true;
|
fc->global = true;
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_FAVOR_DYNMODS
|
||||||
|
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2572,10 +2596,6 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
|
|||||||
if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
|
if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
/* mixables don't care */
|
|
||||||
if (cgroup_is_mixable(dst_cgrp))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If @dst_cgrp is already or can become a thread root or is
|
* If @dst_cgrp is already or can become a thread root or is
|
||||||
* threaded, it doesn't matter.
|
* threaded, it doesn't matter.
|
||||||
@ -2949,22 +2969,40 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
|
|||||||
struct cgroup_subsys_state *d_css;
|
struct cgroup_subsys_state *d_css;
|
||||||
struct cgroup *dsct;
|
struct cgroup *dsct;
|
||||||
struct css_set *src_cset;
|
struct css_set *src_cset;
|
||||||
|
bool has_tasks;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
lockdep_assert_held(&cgroup_mutex);
|
lockdep_assert_held(&cgroup_mutex);
|
||||||
|
|
||||||
percpu_down_write(&cgroup_threadgroup_rwsem);
|
|
||||||
|
|
||||||
/* look up all csses currently attached to @cgrp's subtree */
|
/* look up all csses currently attached to @cgrp's subtree */
|
||||||
spin_lock_irq(&css_set_lock);
|
spin_lock_irq(&css_set_lock);
|
||||||
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
|
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
|
||||||
struct cgrp_cset_link *link;
|
struct cgrp_cset_link *link;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As cgroup_update_dfl_csses() is only called by
|
||||||
|
* cgroup_apply_control(). The csses associated with the
|
||||||
|
* given cgrp will not be affected by changes made to
|
||||||
|
* its subtree_control file. We can skip them.
|
||||||
|
*/
|
||||||
|
if (dsct == cgrp)
|
||||||
|
continue;
|
||||||
|
|
||||||
list_for_each_entry(link, &dsct->cset_links, cset_link)
|
list_for_each_entry(link, &dsct->cset_links, cset_link)
|
||||||
cgroup_migrate_add_src(link->cset, dsct, &mgctx);
|
cgroup_migrate_add_src(link->cset, dsct, &mgctx);
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&css_set_lock);
|
spin_unlock_irq(&css_set_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to write-lock threadgroup_rwsem while migrating tasks.
|
||||||
|
* However, if there are no source csets for @cgrp, changing its
|
||||||
|
* controllers isn't gonna produce any task migrations and the
|
||||||
|
* write-locking can be skipped safely.
|
||||||
|
*/
|
||||||
|
has_tasks = !list_empty(&mgctx.preloaded_src_csets);
|
||||||
|
if (has_tasks)
|
||||||
|
percpu_down_write(&cgroup_threadgroup_rwsem);
|
||||||
|
|
||||||
/* NULL dst indicates self on default hierarchy */
|
/* NULL dst indicates self on default hierarchy */
|
||||||
ret = cgroup_migrate_prepare_dst(&mgctx);
|
ret = cgroup_migrate_prepare_dst(&mgctx);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -2984,6 +3022,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
|
|||||||
ret = cgroup_migrate_execute(&mgctx);
|
ret = cgroup_migrate_execute(&mgctx);
|
||||||
out_finish:
|
out_finish:
|
||||||
cgroup_migrate_finish(&mgctx);
|
cgroup_migrate_finish(&mgctx);
|
||||||
|
if (has_tasks)
|
||||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
percpu_up_write(&cgroup_threadgroup_rwsem);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -3618,21 +3657,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
|
|||||||
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
|
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
||||||
|
|
||||||
return psi_show(seq, psi, PSI_IO);
|
return psi_show(seq, psi, PSI_IO);
|
||||||
}
|
}
|
||||||
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
|
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
||||||
|
|
||||||
return psi_show(seq, psi, PSI_MEM);
|
return psi_show(seq, psi, PSI_MEM);
|
||||||
}
|
}
|
||||||
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
||||||
|
|
||||||
return psi_show(seq, psi, PSI_CPU);
|
return psi_show(seq, psi, PSI_CPU);
|
||||||
}
|
}
|
||||||
@ -3658,7 +3697,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
|||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
||||||
new = psi_trigger_create(psi, buf, nbytes, res);
|
new = psi_trigger_create(psi, buf, nbytes, res);
|
||||||
if (IS_ERR(new)) {
|
if (IS_ERR(new)) {
|
||||||
cgroup_put(cgrp);
|
cgroup_put(cgrp);
|
||||||
@ -5851,12 +5890,6 @@ int __init cgroup_init(void)
|
|||||||
|
|
||||||
cgroup_rstat_boot();
|
cgroup_rstat_boot();
|
||||||
|
|
||||||
/*
|
|
||||||
* The latency of the synchronize_rcu() is too high for cgroups,
|
|
||||||
* avoid it at the cost of forcing all readers into the slow path.
|
|
||||||
*/
|
|
||||||
rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
|
|
||||||
|
|
||||||
get_user_ns(init_cgroup_ns.user_ns);
|
get_user_ns(init_cgroup_ns.user_ns);
|
||||||
|
|
||||||
mutex_lock(&cgroup_mutex);
|
mutex_lock(&cgroup_mutex);
|
||||||
@ -6768,6 +6801,7 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
|
|||||||
{
|
{
|
||||||
return snprintf(buf, PAGE_SIZE,
|
return snprintf(buf, PAGE_SIZE,
|
||||||
"nsdelegate\n"
|
"nsdelegate\n"
|
||||||
|
"favordynmods\n"
|
||||||
"memory_localevents\n"
|
"memory_localevents\n"
|
||||||
"memory_recursiveprot\n");
|
"memory_recursiveprot\n");
|
||||||
}
|
}
|
||||||
|
@ -957,10 +957,16 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
|
|||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
|
cgroup->psi = kmalloc(sizeof(struct psi_group), GFP_KERNEL);
|
||||||
if (!cgroup->psi.pcpu)
|
if (!cgroup->psi)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
group_init(&cgroup->psi);
|
|
||||||
|
cgroup->psi->pcpu = alloc_percpu(struct psi_group_cpu);
|
||||||
|
if (!cgroup->psi->pcpu) {
|
||||||
|
kfree(cgroup->psi);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
group_init(cgroup->psi);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -969,10 +975,11 @@ void psi_cgroup_free(struct cgroup *cgroup)
|
|||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cancel_delayed_work_sync(&cgroup->psi.avgs_work);
|
cancel_delayed_work_sync(&cgroup->psi->avgs_work);
|
||||||
free_percpu(cgroup->psi.pcpu);
|
free_percpu(cgroup->psi->pcpu);
|
||||||
/* All triggers must be removed by now */
|
/* All triggers must be removed by now */
|
||||||
WARN_ONCE(cgroup->psi.poll_states, "psi: trigger leak\n");
|
WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
|
||||||
|
kfree(cgroup->psi);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user