forked from Minki/linux
Merge branch 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Two cpuset behavior changes: - cpuset on cgroup2 is changed to enable memory migration based on nodemask by default. - A notification is generated when cpuset partition state changes. All other patches are minor fixes and cleanups" * 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: Avoid compiler warnings with no subsystems cgroup/cpuset: Avoid memory migration when nodemasks match cgroup/cpuset: Enable memory migration for cpuset v2 cgroup/cpuset: Enable event notification when partition state changes cgroup: cgroup-v1: clean up kernel-doc notation cgroup: Replace deprecated CPU-hotplug functions. cgroup/cpuset: Fix violation of cpuset locking rule cgroup/cpuset: Fix a partition bug with hotplug cgroup/cpuset: Miscellaneous code cleanup cgroup: remove cgroup_mount from comments
This commit is contained in:
commit
69dc8010b8
@ -2056,6 +2056,17 @@ Cpuset Interface Files
|
||||
The value of "cpuset.mems" stays constant until the next update
|
||||
and won't be affected by any memory nodes hotplug events.
|
||||
|
||||
Setting a non-empty value to "cpuset.mems" causes memory of
|
||||
tasks within the cgroup to be migrated to the designated nodes if
|
||||
they are currently using memory outside of the designated nodes.
|
||||
|
||||
There is a cost for this memory migration. The migration
|
||||
may not be complete and some memory pages may be left behind.
|
||||
So it is recommended that "cpuset.mems" should be set properly
|
||||
before spawning new tasks into the cpuset. Even if there is
|
||||
a need to change "cpuset.mems" with active tasks, it shouldn't
|
||||
be done frequently.
|
||||
|
||||
cpuset.mems.effective
|
||||
A read-only multiple values file which exists on all
|
||||
cpuset-enabled cgroups.
|
||||
|
@ -50,6 +50,8 @@ bool cgroup1_ssid_disabled(int ssid)
|
||||
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
|
||||
* @from: attach to all cgroups of a given task
|
||||
* @tsk: the task to be attached
|
||||
*
|
||||
* Return: %0 on success or a negative errno code on failure
|
||||
*/
|
||||
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
||||
{
|
||||
@ -80,7 +82,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
||||
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
|
||||
|
||||
/**
|
||||
* cgroup_trasnsfer_tasks - move tasks from one cgroup to another
|
||||
* cgroup_transfer_tasks - move tasks from one cgroup to another
|
||||
* @to: cgroup to which the tasks will be moved
|
||||
* @from: cgroup in which the tasks currently reside
|
||||
*
|
||||
@ -89,6 +91,8 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
|
||||
* is guaranteed to be either visible in the source cgroup after the
|
||||
* parent's migration is complete or put into the target cgroup. No task
|
||||
* can slip out of migration through forking.
|
||||
*
|
||||
* Return: %0 on success or a negative errno code on failure
|
||||
*/
|
||||
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
|
||||
{
|
||||
@ -682,6 +686,8 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
|
||||
*
|
||||
* Build and fill cgroupstats so that taskstats can export it to user
|
||||
* space.
|
||||
*
|
||||
* Return: %0 on success or a negative errno code on failure
|
||||
*/
|
||||
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
|
||||
{
|
||||
|
@ -67,6 +67,14 @@
|
||||
/* let's not notify more than 100 times per second */
|
||||
#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
|
||||
|
||||
/*
|
||||
* To avoid confusing the compiler (and generating warnings) with code
|
||||
* that attempts to access what would be a 0-element array (i.e. sized
|
||||
* to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
|
||||
* constant expression can be added.
|
||||
*/
|
||||
#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
|
||||
|
||||
/*
|
||||
* cgroup_mutex is the master lock. Any modification to cgroup or its
|
||||
* hierarchy must be performed while holding it.
|
||||
@ -248,7 +256,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
||||
*/
|
||||
bool cgroup_ssid_enabled(int ssid)
|
||||
{
|
||||
if (CGROUP_SUBSYS_COUNT == 0)
|
||||
if (!CGROUP_HAS_SUBSYS_CONFIG)
|
||||
return false;
|
||||
|
||||
return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
|
||||
@ -472,7 +480,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp)
|
||||
static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
|
||||
struct cgroup_subsys *ss)
|
||||
{
|
||||
if (ss)
|
||||
if (CGROUP_HAS_SUBSYS_CONFIG && ss)
|
||||
return rcu_dereference_check(cgrp->subsys[ss->id],
|
||||
lockdep_is_held(&cgroup_mutex));
|
||||
else
|
||||
@ -550,6 +558,9 @@ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
if (!CGROUP_HAS_SUBSYS_CONFIG)
|
||||
return NULL;
|
||||
|
||||
do {
|
||||
css = cgroup_css(cgrp, ss);
|
||||
|
||||
@ -577,6 +588,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
if (!CGROUP_HAS_SUBSYS_CONFIG)
|
||||
return NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
do {
|
||||
@ -647,7 +661,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
|
||||
* the matching css from the cgroup's subsys table is guaranteed to
|
||||
* be and stay valid until the enclosing operation is complete.
|
||||
*/
|
||||
if (cft->ss)
|
||||
if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss)
|
||||
return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
|
||||
else
|
||||
return &cgrp->self;
|
||||
@ -695,7 +709,7 @@ EXPORT_SYMBOL_GPL(of_css);
|
||||
*/
|
||||
#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
|
||||
unsigned long __ss_mask = (ss_mask); \
|
||||
if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
|
||||
if (!CGROUP_HAS_SUBSYS_CONFIG) { \
|
||||
(ssid) = 0; \
|
||||
break; \
|
||||
} \
|
||||
@ -2169,7 +2183,6 @@ static void cgroup_kill_sb(struct super_block *sb)
|
||||
/*
|
||||
* If @root doesn't have any children, start killing it.
|
||||
* This prevents new mounts by disabling percpu_ref_tryget_live().
|
||||
* cgroup_mount() may wait for @root's release.
|
||||
*
|
||||
* And don't kill the default root.
|
||||
*/
|
||||
@ -2373,7 +2386,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
|
||||
struct css_set *cset = tset->cur_cset;
|
||||
struct task_struct *task = tset->cur_task;
|
||||
|
||||
while (&cset->mg_node != tset->csets) {
|
||||
while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) {
|
||||
if (!task)
|
||||
task = list_first_entry(&cset->mg_tasks,
|
||||
struct task_struct, cg_list);
|
||||
@ -4644,7 +4657,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
|
||||
it->ss = css->ss;
|
||||
it->flags = flags;
|
||||
|
||||
if (it->ss)
|
||||
if (CGROUP_HAS_SUBSYS_CONFIG && it->ss)
|
||||
it->cset_pos = &css->cgroup->e_csets[css->ss->id];
|
||||
else
|
||||
it->cset_pos = &css->cgroup->cset_links;
|
||||
|
@ -160,6 +160,9 @@ struct cpuset {
|
||||
*/
|
||||
int use_parent_ecpus;
|
||||
int child_ecpus_count;
|
||||
|
||||
/* Handle for cpuset.cpus.partition */
|
||||
struct cgroup_file partition_file;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -263,6 +266,16 @@ static inline int is_partition_root(const struct cpuset *cs)
|
||||
return cs->partition_root_state > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send notification event of whenever partition_root_state changes.
|
||||
*/
|
||||
static inline void notify_partition_change(struct cpuset *cs,
|
||||
int old_prs, int new_prs)
|
||||
{
|
||||
if (old_prs != new_prs)
|
||||
cgroup_file_notify(&cs->partition_file);
|
||||
}
|
||||
|
||||
static struct cpuset top_cpuset = {
|
||||
.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
|
||||
(1 << CS_MEM_EXCLUSIVE)),
|
||||
@ -992,7 +1005,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
|
||||
* 'cpus' is removed, then call this routine to rebuild the
|
||||
* scheduler's dynamic sched domains.
|
||||
*
|
||||
* Call with cpuset_mutex held. Takes get_online_cpus().
|
||||
* Call with cpuset_mutex held. Takes cpus_read_lock().
|
||||
*/
|
||||
static void rebuild_sched_domains_locked(void)
|
||||
{
|
||||
@ -1053,11 +1066,11 @@ static void rebuild_sched_domains_locked(void)
|
||||
|
||||
void rebuild_sched_domains(void)
|
||||
{
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
rebuild_sched_domains_locked();
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1127,7 +1140,7 @@ enum subparts_cmd {
|
||||
* cpus_allowed can be granted or an error code will be returned.
|
||||
*
|
||||
* For partcmd_disable, the cpuset is being transofrmed from a partition
|
||||
* root back to a non-partition root. any CPUs in cpus_allowed that are in
|
||||
* root back to a non-partition root. Any CPUs in cpus_allowed that are in
|
||||
* parent's subparts_cpus will be taken away from that cpumask and put back
|
||||
* into parent's effective_cpus. 0 should always be returned.
|
||||
*
|
||||
@ -1161,6 +1174,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
struct cpuset *parent = parent_cs(cpuset);
|
||||
int adding; /* Moving cpus from effective_cpus to subparts_cpus */
|
||||
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */
|
||||
int old_prs, new_prs;
|
||||
bool part_error = false; /* Partition error? */
|
||||
|
||||
percpu_rwsem_assert_held(&cpuset_rwsem);
|
||||
@ -1196,6 +1210,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
* A cpumask update cannot make parent's effective_cpus become empty.
|
||||
*/
|
||||
adding = deleting = false;
|
||||
old_prs = new_prs = cpuset->partition_root_state;
|
||||
if (cmd == partcmd_enable) {
|
||||
cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
|
||||
adding = true;
|
||||
@ -1238,7 +1253,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
/*
|
||||
* partcmd_update w/o newmask:
|
||||
*
|
||||
* addmask = cpus_allowed & parent->effectiveb_cpus
|
||||
* addmask = cpus_allowed & parent->effective_cpus
|
||||
*
|
||||
* Note that parent's subparts_cpus may have been
|
||||
* pre-shrunk in case there is a change in the cpu list.
|
||||
@ -1260,11 +1275,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
switch (cpuset->partition_root_state) {
|
||||
case PRS_ENABLED:
|
||||
if (part_error)
|
||||
cpuset->partition_root_state = PRS_ERROR;
|
||||
new_prs = PRS_ERROR;
|
||||
break;
|
||||
case PRS_ERROR:
|
||||
if (!part_error)
|
||||
cpuset->partition_root_state = PRS_ENABLED;
|
||||
new_prs = PRS_ENABLED;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
@ -1273,10 +1288,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
part_error = (prev_prs == PRS_ERROR);
|
||||
}
|
||||
|
||||
if (!part_error && (cpuset->partition_root_state == PRS_ERROR))
|
||||
if (!part_error && (new_prs == PRS_ERROR))
|
||||
return 0; /* Nothing need to be done */
|
||||
|
||||
if (cpuset->partition_root_state == PRS_ERROR) {
|
||||
if (new_prs == PRS_ERROR) {
|
||||
/*
|
||||
* Remove all its cpus from parent's subparts_cpus.
|
||||
*/
|
||||
@ -1285,7 +1300,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
parent->subparts_cpus);
|
||||
}
|
||||
|
||||
if (!adding && !deleting)
|
||||
if (!adding && !deleting && (new_prs == old_prs))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -1312,7 +1327,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
|
||||
}
|
||||
|
||||
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
|
||||
|
||||
if (old_prs != new_prs)
|
||||
cpuset->partition_root_state = new_prs;
|
||||
|
||||
spin_unlock_irq(&callback_lock);
|
||||
notify_partition_change(cpuset, old_prs, new_prs);
|
||||
|
||||
return cmd == partcmd_update;
|
||||
}
|
||||
@ -1334,6 +1354,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
|
||||
struct cpuset *cp;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
bool need_rebuild_sched_domains = false;
|
||||
int old_prs, new_prs;
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cp, pos_css, cs) {
|
||||
@ -1373,17 +1394,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
|
||||
* update_tasks_cpumask() again for tasks in the parent
|
||||
* cpuset if the parent's subparts_cpus changes.
|
||||
*/
|
||||
if ((cp != cs) && cp->partition_root_state) {
|
||||
old_prs = new_prs = cp->partition_root_state;
|
||||
if ((cp != cs) && old_prs) {
|
||||
switch (parent->partition_root_state) {
|
||||
case PRS_DISABLED:
|
||||
/*
|
||||
* If parent is not a partition root or an
|
||||
* invalid partition root, clear the state
|
||||
* state and the CS_CPU_EXCLUSIVE flag.
|
||||
* invalid partition root, clear its state
|
||||
* and its CS_CPU_EXCLUSIVE flag.
|
||||
*/
|
||||
WARN_ON_ONCE(cp->partition_root_state
|
||||
!= PRS_ERROR);
|
||||
cp->partition_root_state = 0;
|
||||
new_prs = PRS_DISABLED;
|
||||
|
||||
/*
|
||||
* clear_bit() is an atomic operation and
|
||||
@ -1404,11 +1426,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
|
||||
/*
|
||||
* When parent is invalid, it has to be too.
|
||||
*/
|
||||
cp->partition_root_state = PRS_ERROR;
|
||||
if (cp->nr_subparts_cpus) {
|
||||
cp->nr_subparts_cpus = 0;
|
||||
cpumask_clear(cp->subparts_cpus);
|
||||
}
|
||||
new_prs = PRS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1420,8 +1438,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
|
||||
spin_lock_irq(&callback_lock);
|
||||
|
||||
cpumask_copy(cp->effective_cpus, tmp->new_cpus);
|
||||
if (cp->nr_subparts_cpus &&
|
||||
(cp->partition_root_state != PRS_ENABLED)) {
|
||||
if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
|
||||
cp->nr_subparts_cpus = 0;
|
||||
cpumask_clear(cp->subparts_cpus);
|
||||
} else if (cp->nr_subparts_cpus) {
|
||||
@ -1448,7 +1465,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
|
||||
= cpumask_weight(cp->subparts_cpus);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_prs != old_prs)
|
||||
cp->partition_root_state = new_prs;
|
||||
|
||||
spin_unlock_irq(&callback_lock);
|
||||
notify_partition_change(cp, old_prs, new_prs);
|
||||
|
||||
WARN_ON(!is_in_v2_mode() &&
|
||||
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
|
||||
@ -1625,6 +1647,11 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
{
|
||||
struct cpuset_migrate_mm_work *mwork;
|
||||
|
||||
if (nodes_equal(*from, *to)) {
|
||||
mmput(mm);
|
||||
return;
|
||||
}
|
||||
|
||||
mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
|
||||
if (mwork) {
|
||||
mwork->mm = mm;
|
||||
@ -1950,34 +1977,32 @@ out:
|
||||
|
||||
/*
|
||||
* update_prstate - update partititon_root_state
|
||||
* cs: the cpuset to update
|
||||
* val: 0 - disabled, 1 - enabled
|
||||
* cs: the cpuset to update
|
||||
* new_prs: new partition root state
|
||||
*
|
||||
* Call with cpuset_mutex held.
|
||||
*/
|
||||
static int update_prstate(struct cpuset *cs, int val)
|
||||
static int update_prstate(struct cpuset *cs, int new_prs)
|
||||
{
|
||||
int err;
|
||||
int err, old_prs = cs->partition_root_state;
|
||||
struct cpuset *parent = parent_cs(cs);
|
||||
struct tmpmasks tmp;
|
||||
struct tmpmasks tmpmask;
|
||||
|
||||
if ((val != 0) && (val != 1))
|
||||
return -EINVAL;
|
||||
if (val == cs->partition_root_state)
|
||||
if (old_prs == new_prs)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Cannot force a partial or invalid partition root to a full
|
||||
* partition root.
|
||||
*/
|
||||
if (val && cs->partition_root_state)
|
||||
if (new_prs && (old_prs == PRS_ERROR))
|
||||
return -EINVAL;
|
||||
|
||||
if (alloc_cpumasks(NULL, &tmp))
|
||||
if (alloc_cpumasks(NULL, &tmpmask))
|
||||
return -ENOMEM;
|
||||
|
||||
err = -EINVAL;
|
||||
if (!cs->partition_root_state) {
|
||||
if (!old_prs) {
|
||||
/*
|
||||
* Turning on partition root requires setting the
|
||||
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
|
||||
@ -1991,31 +2016,27 @@ static int update_prstate(struct cpuset *cs, int val)
|
||||
goto out;
|
||||
|
||||
err = update_parent_subparts_cpumask(cs, partcmd_enable,
|
||||
NULL, &tmp);
|
||||
NULL, &tmpmask);
|
||||
if (err) {
|
||||
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
|
||||
goto out;
|
||||
}
|
||||
cs->partition_root_state = PRS_ENABLED;
|
||||
} else {
|
||||
/*
|
||||
* Turning off partition root will clear the
|
||||
* CS_CPU_EXCLUSIVE bit.
|
||||
*/
|
||||
if (cs->partition_root_state == PRS_ERROR) {
|
||||
cs->partition_root_state = 0;
|
||||
if (old_prs == PRS_ERROR) {
|
||||
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = update_parent_subparts_cpumask(cs, partcmd_disable,
|
||||
NULL, &tmp);
|
||||
NULL, &tmpmask);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
cs->partition_root_state = 0;
|
||||
|
||||
/* Turning off CS_CPU_EXCLUSIVE will not return error */
|
||||
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
|
||||
}
|
||||
@ -2028,11 +2049,18 @@ static int update_prstate(struct cpuset *cs, int val)
|
||||
update_tasks_cpumask(parent);
|
||||
|
||||
if (parent->child_ecpus_count)
|
||||
update_sibling_cpumasks(parent, cs, &tmp);
|
||||
update_sibling_cpumasks(parent, cs, &tmpmask);
|
||||
|
||||
rebuild_sched_domains_locked();
|
||||
out:
|
||||
free_cpumasks(NULL, &tmp);
|
||||
if (!err) {
|
||||
spin_lock_irq(&callback_lock);
|
||||
cs->partition_root_state = new_prs;
|
||||
spin_unlock_irq(&callback_lock);
|
||||
notify_partition_change(cs, old_prs, new_prs);
|
||||
}
|
||||
|
||||
free_cpumasks(NULL, &tmpmask);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2293,7 +2321,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
cpuset_filetype_t type = cft->private;
|
||||
int retval = 0;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
if (!is_cpuset_online(cs)) {
|
||||
retval = -ENODEV;
|
||||
@ -2331,7 +2359,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
}
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -2342,7 +2370,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
cpuset_filetype_t type = cft->private;
|
||||
int retval = -ENODEV;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
if (!is_cpuset_online(cs))
|
||||
goto out_unlock;
|
||||
@ -2357,7 +2385,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
}
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -2396,7 +2424,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
||||
kernfs_break_active_protection(of->kn);
|
||||
flush_work(&cpuset_hotplug_work);
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
if (!is_cpuset_online(cs))
|
||||
goto out_unlock;
|
||||
@ -2422,7 +2450,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
||||
free_cpuset(trialcs);
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
kernfs_unbreak_active_protection(of->kn);
|
||||
css_put(&cs->css);
|
||||
flush_workqueue(cpuset_migrate_mm_wq);
|
||||
@ -2553,7 +2581,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
|
||||
return -EINVAL;
|
||||
|
||||
css_get(&cs->css);
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
if (!is_cpuset_online(cs))
|
||||
goto out_unlock;
|
||||
@ -2561,7 +2589,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
|
||||
retval = update_prstate(cs, val);
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
css_put(&cs->css);
|
||||
return retval ?: nbytes;
|
||||
}
|
||||
@ -2713,6 +2741,7 @@ static struct cftype dfl_files[] = {
|
||||
.write = sched_partition_write,
|
||||
.private = FILE_PARTITION_ROOT,
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.file_offset = offsetof(struct cpuset, partition_file),
|
||||
},
|
||||
|
||||
{
|
||||
@ -2748,12 +2777,16 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||
__set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||
nodes_clear(cs->mems_allowed);
|
||||
nodes_clear(cs->effective_mems);
|
||||
fmeter_init(&cs->fmeter);
|
||||
cs->relax_domain_level = -1;
|
||||
|
||||
/* Set CS_MEMORY_MIGRATE for default hierarchy */
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
|
||||
__set_bit(CS_MEMORY_MIGRATE, &cs->flags);
|
||||
|
||||
return &cs->css;
|
||||
}
|
||||
|
||||
@ -2767,7 +2800,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
||||
if (!parent)
|
||||
return 0;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
set_bit(CS_ONLINE, &cs->flags);
|
||||
@ -2820,7 +2853,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
||||
spin_unlock_irq(&callback_lock);
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2839,7 +2872,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct cpuset *cs = css_cs(css);
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
if (is_partition_root(cs))
|
||||
@ -2860,7 +2893,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
|
||||
clear_bit(CS_ONLINE, &cs->flags);
|
||||
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static void cpuset_css_free(struct cgroup_subsys_state *css)
|
||||
@ -3071,7 +3104,7 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
parent = parent_cs(cs);
|
||||
parent = parent_cs(cs);
|
||||
compute_effective_cpumask(&new_cpus, cs, parent);
|
||||
nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
|
||||
|
||||
@ -3093,8 +3126,10 @@ retry:
|
||||
if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
|
||||
(parent->partition_root_state == PRS_ERROR))) {
|
||||
if (cs->nr_subparts_cpus) {
|
||||
spin_lock_irq(&callback_lock);
|
||||
cs->nr_subparts_cpus = 0;
|
||||
cpumask_clear(cs->subparts_cpus);
|
||||
spin_unlock_irq(&callback_lock);
|
||||
compute_effective_cpumask(&new_cpus, cs, parent);
|
||||
}
|
||||
|
||||
@ -3106,9 +3141,17 @@ retry:
|
||||
*/
|
||||
if ((parent->partition_root_state == PRS_ERROR) ||
|
||||
cpumask_empty(&new_cpus)) {
|
||||
int old_prs;
|
||||
|
||||
update_parent_subparts_cpumask(cs, partcmd_disable,
|
||||
NULL, tmp);
|
||||
cs->partition_root_state = PRS_ERROR;
|
||||
old_prs = cs->partition_root_state;
|
||||
if (old_prs != PRS_ERROR) {
|
||||
spin_lock_irq(&callback_lock);
|
||||
cs->partition_root_state = PRS_ERROR;
|
||||
spin_unlock_irq(&callback_lock);
|
||||
notify_partition_change(cs, old_prs, PRS_ERROR);
|
||||
}
|
||||
}
|
||||
cpuset_force_rebuild();
|
||||
}
|
||||
@ -3179,6 +3222,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
||||
cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
|
||||
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
|
||||
|
||||
/*
|
||||
* In the rare case that hotplug removes all the cpus in subparts_cpus,
|
||||
* we assumed that cpus are updated.
|
||||
*/
|
||||
if (!cpus_updated && top_cpuset.nr_subparts_cpus)
|
||||
cpus_updated = true;
|
||||
|
||||
/* synchronize cpus_allowed to cpu_active_mask */
|
||||
if (cpus_updated) {
|
||||
spin_lock_irq(&callback_lock);
|
||||
|
Loading…
Reference in New Issue
Block a user