Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "During recent restructuring, device_cgroup unified config input check and enforcement logic; unfortunately, it turned out to share too much. Aristeu's patches fix the breakage and marked for -stable backport. The other two patches are fallouts from kernfs conversion. The blkcg change is temporary and will go away once kernfs internal locking gets simplified (patches pending)" * 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: blkcg: use trylock on blkcg_pol_mutex in blkcg_reset_stats() device_cgroup: check if exception removal is allowed device_cgroup: fix the comment format for recently added functions device_cgroup: rework device access check and exception checking cgroup: fix the retry path of cgroup_mount()
This commit is contained in:
commit
26a41cd1ee
@ -451,7 +451,20 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
|||||||
struct blkcg_gq *blkg;
|
struct blkcg_gq *blkg;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
mutex_lock(&blkcg_pol_mutex);
|
/*
|
||||||
|
* XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
|
||||||
|
* which ends up putting cgroup's internal cgroup_tree_mutex under
|
||||||
|
* it; however, cgroup_tree_mutex is nested above cgroup file
|
||||||
|
* active protection and grabbing blkcg_pol_mutex from a cgroup
|
||||||
|
* file operation creates a possible circular dependency. cgroup
|
||||||
|
* internal locking is planned to go through further simplification
|
||||||
|
* and this issue should go away soon. For now, let's trylock
|
||||||
|
* blkcg_pol_mutex and restart the write on failure.
|
||||||
|
*
|
||||||
|
* http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
|
||||||
|
*/
|
||||||
|
if (!mutex_trylock(&blkcg_pol_mutex))
|
||||||
|
return restart_syscall();
|
||||||
spin_lock_irq(&blkcg->lock);
|
spin_lock_irq(&blkcg->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|||||||
*/
|
*/
|
||||||
if (!use_task_css_set_links)
|
if (!use_task_css_set_links)
|
||||||
cgroup_enable_task_cg_lists();
|
cgroup_enable_task_cg_lists();
|
||||||
retry:
|
|
||||||
mutex_lock(&cgroup_tree_mutex);
|
mutex_lock(&cgroup_tree_mutex);
|
||||||
mutex_lock(&cgroup_mutex);
|
mutex_lock(&cgroup_mutex);
|
||||||
|
|
||||||
@ -1503,7 +1503,7 @@ retry:
|
|||||||
ret = parse_cgroupfs_options(data, &opts);
|
ret = parse_cgroupfs_options(data, &opts);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
retry:
|
||||||
/* look for a matching existing root */
|
/* look for a matching existing root */
|
||||||
if (!opts.subsys_mask && !opts.none && !opts.name) {
|
if (!opts.subsys_mask && !opts.none && !opts.name) {
|
||||||
cgrp_dfl_root_visible = true;
|
cgrp_dfl_root_visible = true;
|
||||||
@ -1562,9 +1562,9 @@ retry:
|
|||||||
if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
|
if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
mutex_unlock(&cgroup_tree_mutex);
|
mutex_unlock(&cgroup_tree_mutex);
|
||||||
kfree(opts.release_agent);
|
|
||||||
kfree(opts.name);
|
|
||||||
msleep(10);
|
msleep(10);
|
||||||
|
mutex_lock(&cgroup_tree_mutex);
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,57 +306,138 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* may_access - verifies if a new exception is part of what is allowed
|
* match_exception - iterates the exception list trying to find a complete match
|
||||||
* by a dev cgroup based on the default policy +
|
* @exceptions: list of exceptions
|
||||||
* exceptions. This is used to make sure a child cgroup
|
* @type: device type (DEV_BLOCK or DEV_CHAR)
|
||||||
* won't have more privileges than its parent or to
|
* @major: device file major number, ~0 to match all
|
||||||
* verify if a certain access is allowed.
|
* @minor: device file minor number, ~0 to match all
|
||||||
* @dev_cgroup: dev cgroup to be tested against
|
* @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
|
||||||
* @refex: new exception
|
*
|
||||||
* @behavior: behavior of the exception
|
* It is considered a complete match if an exception is found that will
|
||||||
|
* contain the entire range of provided parameters.
|
||||||
|
*
|
||||||
|
* Return: true in case it matches an exception completely
|
||||||
*/
|
*/
|
||||||
static bool may_access(struct dev_cgroup *dev_cgroup,
|
static bool match_exception(struct list_head *exceptions, short type,
|
||||||
struct dev_exception_item *refex,
|
u32 major, u32 minor, short access)
|
||||||
enum devcg_behavior behavior)
|
|
||||||
{
|
{
|
||||||
struct dev_exception_item *ex;
|
struct dev_exception_item *ex;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(ex, exceptions, list) {
|
||||||
|
if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
|
||||||
|
continue;
|
||||||
|
if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
|
||||||
|
continue;
|
||||||
|
if (ex->major != ~0 && ex->major != major)
|
||||||
|
continue;
|
||||||
|
if (ex->minor != ~0 && ex->minor != minor)
|
||||||
|
continue;
|
||||||
|
/* provided access cannot have more than the exception rule */
|
||||||
|
if (access & (~ex->access))
|
||||||
|
continue;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* match_exception_partial - iterates the exception list trying to find a partial match
|
||||||
|
* @exceptions: list of exceptions
|
||||||
|
* @type: device type (DEV_BLOCK or DEV_CHAR)
|
||||||
|
* @major: device file major number, ~0 to match all
|
||||||
|
* @minor: device file minor number, ~0 to match all
|
||||||
|
* @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
|
||||||
|
*
|
||||||
|
* It is considered a partial match if an exception's range is found to
|
||||||
|
* contain *any* of the devices specified by provided parameters. This is
|
||||||
|
* used to make sure no extra access is being granted that is forbidden by
|
||||||
|
* any of the exception list.
|
||||||
|
*
|
||||||
|
* Return: true in case the provided range mat matches an exception completely
|
||||||
|
*/
|
||||||
|
static bool match_exception_partial(struct list_head *exceptions, short type,
|
||||||
|
u32 major, u32 minor, short access)
|
||||||
|
{
|
||||||
|
struct dev_exception_item *ex;
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(ex, exceptions, list) {
|
||||||
|
if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
|
||||||
|
continue;
|
||||||
|
if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* We must be sure that both the exception and the provided
|
||||||
|
* range aren't masking all devices
|
||||||
|
*/
|
||||||
|
if (ex->major != ~0 && major != ~0 && ex->major != major)
|
||||||
|
continue;
|
||||||
|
if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* In order to make sure the provided range isn't matching
|
||||||
|
* an exception, all its access bits shouldn't match the
|
||||||
|
* exception's access bits
|
||||||
|
*/
|
||||||
|
if (!(access & ex->access))
|
||||||
|
continue;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
|
||||||
|
* @dev_cgroup: dev cgroup to be tested against
|
||||||
|
* @refex: new exception
|
||||||
|
* @behavior: behavior of the exception's dev_cgroup
|
||||||
|
*
|
||||||
|
* This is used to make sure a child cgroup won't have more privileges
|
||||||
|
* than its parent
|
||||||
|
*/
|
||||||
|
static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
|
||||||
|
struct dev_exception_item *refex,
|
||||||
|
enum devcg_behavior behavior)
|
||||||
|
{
|
||||||
bool match = false;
|
bool match = false;
|
||||||
|
|
||||||
rcu_lockdep_assert(rcu_read_lock_held() ||
|
rcu_lockdep_assert(rcu_read_lock_held() ||
|
||||||
lockdep_is_held(&devcgroup_mutex),
|
lockdep_is_held(&devcgroup_mutex),
|
||||||
"device_cgroup::may_access() called without proper synchronization");
|
"device_cgroup:verify_new_ex called without proper synchronization");
|
||||||
|
|
||||||
list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
|
|
||||||
if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
|
|
||||||
continue;
|
|
||||||
if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
|
|
||||||
continue;
|
|
||||||
if (ex->major != ~0 && ex->major != refex->major)
|
|
||||||
continue;
|
|
||||||
if (ex->minor != ~0 && ex->minor != refex->minor)
|
|
||||||
continue;
|
|
||||||
if (refex->access & (~ex->access))
|
|
||||||
continue;
|
|
||||||
match = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
|
if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
|
||||||
if (behavior == DEVCG_DEFAULT_ALLOW) {
|
if (behavior == DEVCG_DEFAULT_ALLOW) {
|
||||||
/* the exception will deny access to certain devices */
|
/*
|
||||||
|
* new exception in the child doesn't matter, only
|
||||||
|
* adding extra restrictions
|
||||||
|
*/
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
/* the exception will allow access to certain devices */
|
/*
|
||||||
|
* new exception in the child will add more devices
|
||||||
|
* that can be acessed, so it can't match any of
|
||||||
|
* parent's exceptions, even slightly
|
||||||
|
*/
|
||||||
|
match = match_exception_partial(&dev_cgroup->exceptions,
|
||||||
|
refex->type,
|
||||||
|
refex->major,
|
||||||
|
refex->minor,
|
||||||
|
refex->access);
|
||||||
|
|
||||||
if (match)
|
if (match)
|
||||||
/*
|
|
||||||
* a new exception allowing access shouldn't
|
|
||||||
* match an parent's exception
|
|
||||||
*/
|
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* only behavior == DEVCG_DEFAULT_DENY allowed here */
|
/*
|
||||||
|
* Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
|
||||||
|
* the new exception will add access to more devices and must
|
||||||
|
* be contained completely in an parent's exception to be
|
||||||
|
* allowed
|
||||||
|
*/
|
||||||
|
match = match_exception(&dev_cgroup->exceptions, refex->type,
|
||||||
|
refex->major, refex->minor,
|
||||||
|
refex->access);
|
||||||
|
|
||||||
if (match)
|
if (match)
|
||||||
/* parent has an exception that matches the proposed */
|
/* parent has an exception that matches the proposed */
|
||||||
return true;
|
return true;
|
||||||
@ -378,7 +459,38 @@ static int parent_has_perm(struct dev_cgroup *childcg,
|
|||||||
|
|
||||||
if (!parent)
|
if (!parent)
|
||||||
return 1;
|
return 1;
|
||||||
return may_access(parent, ex, childcg->behavior);
|
return verify_new_ex(parent, ex, childcg->behavior);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parent_allows_removal - verify if it's ok to remove an exception
|
||||||
|
* @childcg: child cgroup from where the exception will be removed
|
||||||
|
* @ex: exception being removed
|
||||||
|
*
|
||||||
|
* When removing an exception in cgroups with default ALLOW policy, it must
|
||||||
|
* be checked if removing it will give the child cgroup more access than the
|
||||||
|
* parent.
|
||||||
|
*
|
||||||
|
* Return: true if it's ok to remove exception, false otherwise
|
||||||
|
*/
|
||||||
|
static bool parent_allows_removal(struct dev_cgroup *childcg,
|
||||||
|
struct dev_exception_item *ex)
|
||||||
|
{
|
||||||
|
struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
|
||||||
|
|
||||||
|
if (!parent)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* It's always allowed to remove access to devices */
|
||||||
|
if (childcg->behavior == DEVCG_DEFAULT_DENY)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure you're not removing part or a whole exception existing in
|
||||||
|
* the parent cgroup
|
||||||
|
*/
|
||||||
|
return !match_exception_partial(&parent->exceptions, ex->type,
|
||||||
|
ex->major, ex->minor, ex->access);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -616,17 +728,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
|
|||||||
|
|
||||||
switch (filetype) {
|
switch (filetype) {
|
||||||
case DEVCG_ALLOW:
|
case DEVCG_ALLOW:
|
||||||
if (!parent_has_perm(devcgroup, &ex))
|
|
||||||
return -EPERM;
|
|
||||||
/*
|
/*
|
||||||
* If the default policy is to allow by default, try to remove
|
* If the default policy is to allow by default, try to remove
|
||||||
* an matching exception instead. And be silent about it: we
|
* an matching exception instead. And be silent about it: we
|
||||||
* don't want to break compatibility
|
* don't want to break compatibility
|
||||||
*/
|
*/
|
||||||
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
|
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
|
||||||
|
/* Check if the parent allows removing it first */
|
||||||
|
if (!parent_allows_removal(devcgroup, &ex))
|
||||||
|
return -EPERM;
|
||||||
dev_exception_rm(devcgroup, &ex);
|
dev_exception_rm(devcgroup, &ex);
|
||||||
return 0;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!parent_has_perm(devcgroup, &ex))
|
||||||
|
return -EPERM;
|
||||||
rc = dev_exception_add(devcgroup, &ex);
|
rc = dev_exception_add(devcgroup, &ex);
|
||||||
break;
|
break;
|
||||||
case DEVCG_DENY:
|
case DEVCG_DENY:
|
||||||
@ -704,18 +820,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
|
|||||||
short access)
|
short access)
|
||||||
{
|
{
|
||||||
struct dev_cgroup *dev_cgroup;
|
struct dev_cgroup *dev_cgroup;
|
||||||
struct dev_exception_item ex;
|
bool rc;
|
||||||
int rc;
|
|
||||||
|
|
||||||
memset(&ex, 0, sizeof(ex));
|
|
||||||
ex.type = type;
|
|
||||||
ex.major = major;
|
|
||||||
ex.minor = minor;
|
|
||||||
ex.access = access;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
dev_cgroup = task_devcgroup(current);
|
dev_cgroup = task_devcgroup(current);
|
||||||
rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
|
if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
|
||||||
|
/* Can't match any of the exceptions, even partially */
|
||||||
|
rc = !match_exception_partial(&dev_cgroup->exceptions,
|
||||||
|
type, major, minor, access);
|
||||||
|
else
|
||||||
|
/* Need to match completely one exception to be allowed */
|
||||||
|
rc = match_exception(&dev_cgroup->exceptions, type, major,
|
||||||
|
minor, access);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (!rc)
|
if (!rc)
|
||||||
|
Loading…
Reference in New Issue
Block a user