forked from Minki/linux
[PATCH] Dynamic sched domains: cpuset changes
Adds the core update_cpu_domains code and updated cpusets documentation Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com> Acked-by: Paul Jackson <pj@sgi.com> Acked-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
1a20ff27ef
commit
85d7b94981
@ -51,6 +51,14 @@ mems_allowed vector.
|
|||||||
|
|
||||||
If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
|
If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
|
||||||
ancestor or descendent, may share any of the same CPUs or Memory Nodes.
|
ancestor or descendent, may share any of the same CPUs or Memory Nodes.
|
||||||
|
A cpuset that is cpu exclusive has a sched domain associated with it.
|
||||||
|
The sched domain consists of all cpus in the current cpuset that are not
|
||||||
|
part of any exclusive child cpusets.
|
||||||
|
This ensures that the scheduler load balacing code only balances
|
||||||
|
against the cpus that are in the sched domain as defined above and not
|
||||||
|
all of the cpus in the system. This removes any overhead due to
|
||||||
|
load balancing code trying to pull tasks outside of the cpu exclusive
|
||||||
|
cpuset only to be prevented by the tasks' cpus_allowed mask.
|
||||||
|
|
||||||
User level code may create and destroy cpusets by name in the cpuset
|
User level code may create and destroy cpusets by name in the cpuset
|
||||||
virtual file system, manage the attributes and permissions of these
|
virtual file system, manage the attributes and permissions of these
|
||||||
@ -84,6 +92,9 @@ This can be especially valuable on:
|
|||||||
and a database), or
|
and a database), or
|
||||||
* NUMA systems running large HPC applications with demanding
|
* NUMA systems running large HPC applications with demanding
|
||||||
performance characteristics.
|
performance characteristics.
|
||||||
|
* Also cpu_exclusive cpusets are useful for servers running orthogonal
|
||||||
|
workloads such as RT applications requiring low latency and HPC
|
||||||
|
applications that are throughput sensitive
|
||||||
|
|
||||||
These subsets, or "soft partitions" must be able to be dynamically
|
These subsets, or "soft partitions" must be able to be dynamically
|
||||||
adjusted, as the job mix changes, without impacting other concurrently
|
adjusted, as the job mix changes, without impacting other concurrently
|
||||||
@ -125,6 +136,8 @@ Cpusets extends these two mechanisms as follows:
|
|||||||
- A cpuset may be marked exclusive, which ensures that no other
|
- A cpuset may be marked exclusive, which ensures that no other
|
||||||
cpuset (except direct ancestors and descendents) may contain
|
cpuset (except direct ancestors and descendents) may contain
|
||||||
any overlapping CPUs or Memory Nodes.
|
any overlapping CPUs or Memory Nodes.
|
||||||
|
Also a cpu_exclusive cpuset would be associated with a sched
|
||||||
|
domain.
|
||||||
- You can list all the tasks (by pid) attached to any cpuset.
|
- You can list all the tasks (by pid) attached to any cpuset.
|
||||||
|
|
||||||
The implementation of cpusets requires a few, simple hooks
|
The implementation of cpusets requires a few, simple hooks
|
||||||
@ -136,6 +149,9 @@ into the rest of the kernel, none in performance critical paths:
|
|||||||
allowed in that tasks cpuset.
|
allowed in that tasks cpuset.
|
||||||
- in sched.c migrate_all_tasks(), to keep migrating tasks within
|
- in sched.c migrate_all_tasks(), to keep migrating tasks within
|
||||||
the CPUs allowed by their cpuset, if possible.
|
the CPUs allowed by their cpuset, if possible.
|
||||||
|
- in sched.c, a new API partition_sched_domains for handling
|
||||||
|
sched domain changes associated with cpu_exclusive cpusets
|
||||||
|
and related changes in both sched.c and arch/ia64/kernel/domain.c
|
||||||
- in the mbind and set_mempolicy system calls, to mask the requested
|
- in the mbind and set_mempolicy system calls, to mask the requested
|
||||||
Memory Nodes by what's allowed in that tasks cpuset.
|
Memory Nodes by what's allowed in that tasks cpuset.
|
||||||
- in page_alloc, to restrict memory to allowed nodes.
|
- in page_alloc, to restrict memory to allowed nodes.
|
||||||
|
@ -595,10 +595,62 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a given cpuset cur, partition the system as follows
|
||||||
|
* a. All cpus in the parent cpuset's cpus_allowed that are not part of any
|
||||||
|
* exclusive child cpusets
|
||||||
|
* b. All cpus in the current cpuset's cpus_allowed that are not part of any
|
||||||
|
* exclusive child cpusets
|
||||||
|
* Build these two partitions by calling partition_sched_domains
|
||||||
|
*
|
||||||
|
* Call with cpuset_sem held. May nest a call to the
|
||||||
|
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
|
||||||
|
*/
|
||||||
|
static void update_cpu_domains(struct cpuset *cur)
|
||||||
|
{
|
||||||
|
struct cpuset *c, *par = cur->parent;
|
||||||
|
cpumask_t pspan, cspan;
|
||||||
|
|
||||||
|
if (par == NULL || cpus_empty(cur->cpus_allowed))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get all cpus from parent's cpus_allowed not part of exclusive
|
||||||
|
* children
|
||||||
|
*/
|
||||||
|
pspan = par->cpus_allowed;
|
||||||
|
list_for_each_entry(c, &par->children, sibling) {
|
||||||
|
if (is_cpu_exclusive(c))
|
||||||
|
cpus_andnot(pspan, pspan, c->cpus_allowed);
|
||||||
|
}
|
||||||
|
if (is_removed(cur) || !is_cpu_exclusive(cur)) {
|
||||||
|
cpus_or(pspan, pspan, cur->cpus_allowed);
|
||||||
|
if (cpus_equal(pspan, cur->cpus_allowed))
|
||||||
|
return;
|
||||||
|
cspan = CPU_MASK_NONE;
|
||||||
|
} else {
|
||||||
|
if (cpus_empty(pspan))
|
||||||
|
return;
|
||||||
|
cspan = cur->cpus_allowed;
|
||||||
|
/*
|
||||||
|
* Get all cpus from current cpuset's cpus_allowed not part
|
||||||
|
* of exclusive children
|
||||||
|
*/
|
||||||
|
list_for_each_entry(c, &cur->children, sibling) {
|
||||||
|
if (is_cpu_exclusive(c))
|
||||||
|
cpus_andnot(cspan, cspan, c->cpus_allowed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lock_cpu_hotplug();
|
||||||
|
partition_sched_domains(&pspan, &cspan);
|
||||||
|
unlock_cpu_hotplug();
|
||||||
|
}
|
||||||
|
|
||||||
static int update_cpumask(struct cpuset *cs, char *buf)
|
static int update_cpumask(struct cpuset *cs, char *buf)
|
||||||
{
|
{
|
||||||
struct cpuset trialcs;
|
struct cpuset trialcs;
|
||||||
int retval;
|
int retval, cpus_unchanged;
|
||||||
|
|
||||||
trialcs = *cs;
|
trialcs = *cs;
|
||||||
retval = cpulist_parse(buf, trialcs.cpus_allowed);
|
retval = cpulist_parse(buf, trialcs.cpus_allowed);
|
||||||
@ -608,9 +660,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
|
|||||||
if (cpus_empty(trialcs.cpus_allowed))
|
if (cpus_empty(trialcs.cpus_allowed))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
retval = validate_change(cs, &trialcs);
|
retval = validate_change(cs, &trialcs);
|
||||||
if (retval == 0)
|
if (retval < 0)
|
||||||
cs->cpus_allowed = trialcs.cpus_allowed;
|
return retval;
|
||||||
return retval;
|
cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
|
||||||
|
cs->cpus_allowed = trialcs.cpus_allowed;
|
||||||
|
if (is_cpu_exclusive(cs) && !cpus_unchanged)
|
||||||
|
update_cpu_domains(cs);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_nodemask(struct cpuset *cs, char *buf)
|
static int update_nodemask(struct cpuset *cs, char *buf)
|
||||||
@ -646,7 +702,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
|
|||||||
{
|
{
|
||||||
int turning_on;
|
int turning_on;
|
||||||
struct cpuset trialcs;
|
struct cpuset trialcs;
|
||||||
int err;
|
int err, cpu_exclusive_changed;
|
||||||
|
|
||||||
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
|
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
|
||||||
|
|
||||||
@ -657,13 +713,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
|
|||||||
clear_bit(bit, &trialcs.flags);
|
clear_bit(bit, &trialcs.flags);
|
||||||
|
|
||||||
err = validate_change(cs, &trialcs);
|
err = validate_change(cs, &trialcs);
|
||||||
if (err == 0) {
|
if (err < 0)
|
||||||
if (turning_on)
|
return err;
|
||||||
set_bit(bit, &cs->flags);
|
cpu_exclusive_changed =
|
||||||
else
|
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
|
||||||
clear_bit(bit, &cs->flags);
|
if (turning_on)
|
||||||
}
|
set_bit(bit, &cs->flags);
|
||||||
return err;
|
else
|
||||||
|
clear_bit(bit, &cs->flags);
|
||||||
|
|
||||||
|
if (cpu_exclusive_changed)
|
||||||
|
update_cpu_domains(cs);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int attach_task(struct cpuset *cs, char *buf)
|
static int attach_task(struct cpuset *cs, char *buf)
|
||||||
@ -1309,12 +1370,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
|||||||
up(&cpuset_sem);
|
up(&cpuset_sem);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
spin_lock(&cs->dentry->d_lock);
|
|
||||||
parent = cs->parent;
|
parent = cs->parent;
|
||||||
set_bit(CS_REMOVED, &cs->flags);
|
set_bit(CS_REMOVED, &cs->flags);
|
||||||
|
if (is_cpu_exclusive(cs))
|
||||||
|
update_cpu_domains(cs);
|
||||||
list_del(&cs->sibling); /* delete my sibling from parent->children */
|
list_del(&cs->sibling); /* delete my sibling from parent->children */
|
||||||
if (list_empty(&parent->children))
|
if (list_empty(&parent->children))
|
||||||
check_for_release(parent);
|
check_for_release(parent);
|
||||||
|
spin_lock(&cs->dentry->d_lock);
|
||||||
d = dget(cs->dentry);
|
d = dget(cs->dentry);
|
||||||
cs->dentry = NULL;
|
cs->dentry = NULL;
|
||||||
spin_unlock(&d->d_lock);
|
spin_unlock(&d->d_lock);
|
||||||
|
Loading…
Reference in New Issue
Block a user