mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 06:31:49 +00:00
Task Control Groups: add cgroup_clone() interface
Add support for cgroup_clone(), a way to create new cgroups intended to be used for systems such as namespace unsharing. A new subsystem callback, post_clone(), is added to allow subsystems to automatically configure cloned cgroups. Signed-off-by: Paul Menage <menage@google.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
b4f48b6363
commit
697f416108
@ -504,6 +504,13 @@ include/linux/cgroup.h for details). Note that although this
|
|||||||
method can return an error code, the error code is currently not
|
method can return an error code, the error code is currently not
|
||||||
always handled well.
|
always handled well.
|
||||||
|
|
||||||
|
void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||||
|
|
||||||
|
Called at the end of cgroup_clone() to do any paramater
|
||||||
|
initialization which might be required before a task could attach. For
|
||||||
|
example in cpusets, no task may attach before 'cpus' and 'mems' are set
|
||||||
|
up.
|
||||||
|
|
||||||
void bind(struct cgroup_subsys *ss, struct cgroup *root)
|
void bind(struct cgroup_subsys *ss, struct cgroup *root)
|
||||||
LL=callback_mutex
|
LL=callback_mutex
|
||||||
|
|
||||||
|
@ -182,6 +182,7 @@ struct cgroup_subsys {
|
|||||||
void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
|
void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
|
||||||
int (*populate)(struct cgroup_subsys *ss,
|
int (*populate)(struct cgroup_subsys *ss,
|
||||||
struct cgroup *cont);
|
struct cgroup *cont);
|
||||||
|
void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
|
||||||
void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
|
void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
|
||||||
int subsys_id;
|
int subsys_id;
|
||||||
int active;
|
int active;
|
||||||
@ -221,6 +222,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
|
|||||||
|
|
||||||
int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
|
int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
|
||||||
|
|
||||||
|
int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
|
||||||
|
|
||||||
#else /* !CONFIG_CGROUPS */
|
#else /* !CONFIG_CGROUPS */
|
||||||
|
|
||||||
static inline int cgroup_init_early(void) { return 0; }
|
static inline int cgroup_init_early(void) { return 0; }
|
||||||
|
135
kernel/cgroup.c
135
kernel/cgroup.c
@ -1708,3 +1708,138 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
|
|||||||
tsk->cgroups = init_task.cgroups;
|
tsk->cgroups = init_task.cgroups;
|
||||||
task_unlock(tsk);
|
task_unlock(tsk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cgroup_clone - duplicate the current cgroup in the hierarchy
|
||||||
|
* that the given subsystem is attached to, and move this task into
|
||||||
|
* the new child
|
||||||
|
*/
|
||||||
|
int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
|
||||||
|
{
|
||||||
|
struct dentry *dentry;
|
||||||
|
int ret = 0;
|
||||||
|
char nodename[MAX_CGROUP_TYPE_NAMELEN];
|
||||||
|
struct cgroup *parent, *child;
|
||||||
|
struct inode *inode;
|
||||||
|
struct css_set *cg;
|
||||||
|
struct cgroupfs_root *root;
|
||||||
|
struct cgroup_subsys *ss;
|
||||||
|
|
||||||
|
/* We shouldn't be called by an unregistered subsystem */
|
||||||
|
BUG_ON(!subsys->active);
|
||||||
|
|
||||||
|
/* First figure out what hierarchy and cgroup we're dealing
|
||||||
|
* with, and pin them so we can drop cgroup_mutex */
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
|
again:
|
||||||
|
root = subsys->root;
|
||||||
|
if (root == &rootnode) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Not cloning cgroup for unused subsystem %s\n",
|
||||||
|
subsys->name);
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cg = &tsk->cgroups;
|
||||||
|
parent = task_cgroup(tsk, subsys->subsys_id);
|
||||||
|
|
||||||
|
snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
|
||||||
|
|
||||||
|
/* Pin the hierarchy */
|
||||||
|
atomic_inc(&parent->root->sb->s_active);
|
||||||
|
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
/* Now do the VFS work to create a cgroup */
|
||||||
|
inode = parent->dentry->d_inode;
|
||||||
|
|
||||||
|
/* Hold the parent directory mutex across this operation to
|
||||||
|
* stop anyone else deleting the new cgroup */
|
||||||
|
mutex_lock(&inode->i_mutex);
|
||||||
|
dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
|
||||||
|
if (IS_ERR(dentry)) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Couldn't allocate dentry for %s: %ld\n", nodename,
|
||||||
|
PTR_ERR(dentry));
|
||||||
|
ret = PTR_ERR(dentry);
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create the cgroup directory, which also creates the cgroup */
|
||||||
|
ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
|
||||||
|
child = __d_cont(dentry);
|
||||||
|
dput(dentry);
|
||||||
|
if (ret) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Failed to create cgroup %s: %d\n", nodename,
|
||||||
|
ret);
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!child) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Couldn't find new cgroup %s\n", nodename);
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The cgroup now exists. Retake cgroup_mutex and check
|
||||||
|
* that we're still in the same state that we thought we
|
||||||
|
* were. */
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
|
if ((root != subsys->root) ||
|
||||||
|
(parent != task_cgroup(tsk, subsys->subsys_id))) {
|
||||||
|
/* Aargh, we raced ... */
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
|
||||||
|
deactivate_super(parent->root->sb);
|
||||||
|
/* The cgroup is still accessible in the VFS, but
|
||||||
|
* we're not going to try to rmdir() it at this
|
||||||
|
* point. */
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Race in cgroup_clone() - leaking cgroup %s\n",
|
||||||
|
nodename);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* do any required auto-setup */
|
||||||
|
for_each_subsys(root, ss) {
|
||||||
|
if (ss->post_clone)
|
||||||
|
ss->post_clone(ss, child);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All seems fine. Finish by moving the task into the new cgroup */
|
||||||
|
ret = attach_task(child, tsk);
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
out_release:
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
deactivate_super(parent->root->sb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* See if "cont" is a descendant of the current task's cgroup in
|
||||||
|
* the appropriate hierarchy
|
||||||
|
*
|
||||||
|
* If we are sending in dummytop, then presumably we are creating
|
||||||
|
* the top cgroup in the subsystem.
|
||||||
|
*
|
||||||
|
* Called only by the ns (nsproxy) cgroup.
|
||||||
|
*/
|
||||||
|
int cgroup_is_descendant(const struct cgroup *cont)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct cgroup *target;
|
||||||
|
int subsys_id;
|
||||||
|
|
||||||
|
if (cont == dummytop)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
get_first_subsys(cont, NULL, &subsys_id);
|
||||||
|
target = task_cgroup(current, subsys_id);
|
||||||
|
while (cont != target && cont!= cont->top_cgroup)
|
||||||
|
cont = cont->parent;
|
||||||
|
ret = (cont == target);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user