memory cgroup enhancements: force_empty interface for dropping all account in empty cgroup

This patch adds an interface "memory.force_empty".  Any write to this file
will drop all charges in this cgroup if there is no task under.

%echo 1 > /....../memory.force_empty

will drop all charges of memory cgroup if cgroup's tasks is empty.

This is useful to invoke rmdir() against memory cgroup successfully.

Tested and worked well on x86_64/fake-NUMA system.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
KAMEZAWA Hiroyuki 2008-02-07 00:14:16 -08:00 committed by Linus Torvalds
parent 417eead304
commit cc8475822f

View File

@ -471,6 +471,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
page = pc->page; page = pc->page;
/* /*
* get page->cgroup and clear it under lock. * get page->cgroup and clear it under lock.
* force_empty can drop page->cgroup without checking refcnt.
*/ */
if (clear_page_cgroup(page, pc) == pc) { if (clear_page_cgroup(page, pc) == pc) {
mem = pc->mem_cgroup; mem = pc->mem_cgroup;
@ -480,13 +481,6 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
list_del_init(&pc->lru); list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags); spin_unlock_irqrestore(&mem->lru_lock, flags);
kfree(pc); kfree(pc);
} else {
/*
* Note:This will be removed when force-empty patch is
* applied. just show warning here.
*/
printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
dump_stack();
} }
} }
} }
@ -534,6 +528,76 @@ retry:
return; return;
} }
/*
* This routine traverse page_cgroup in given list and drop them all.
* This routine ignores page_cgroup->ref_cnt.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
#define FORCE_UNCHARGE_BATCH (128)
static void
mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list)
{
struct page_cgroup *pc;
struct page *page;
int count;
unsigned long flags;
retry:
count = FORCE_UNCHARGE_BATCH;
spin_lock_irqsave(&mem->lru_lock, flags);
while (--count && !list_empty(list)) {
pc = list_entry(list->prev, struct page_cgroup, lru);
page = pc->page;
/* Avoid race with charge */
atomic_set(&pc->ref_cnt, 0);
if (clear_page_cgroup(page, pc) == pc) {
css_put(&mem->css);
res_counter_uncharge(&mem->res, PAGE_SIZE);
list_del_init(&pc->lru);
kfree(pc);
} else /* being uncharged ? ...do relax */
break;
}
spin_unlock_irqrestore(&mem->lru_lock, flags);
if (!list_empty(list)) {
cond_resched();
goto retry;
}
return;
}
/*
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
int mem_cgroup_force_empty(struct mem_cgroup *mem)
{
int ret = -EBUSY;
css_get(&mem->css);
/*
* page reclaim code (kswapd etc..) will move pages between
` * active_list <-> inactive_list while we don't take a lock.
* So, we have to do loop here until all lists are empty.
*/
while (!(list_empty(&mem->active_list) &&
list_empty(&mem->inactive_list))) {
if (atomic_read(&mem->css.cgroup->count) > 0)
goto out;
/* drop all page_cgroup in active_list */
mem_cgroup_force_empty_list(mem, &mem->active_list);
/* drop all page_cgroup in inactive_list */
mem_cgroup_force_empty_list(mem, &mem->inactive_list);
}
ret = 0;
out:
css_put(&mem->css);
return ret;
}
int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
{ {
*tmp = memparse(buf, &buf); *tmp = memparse(buf, &buf);
@ -619,6 +683,33 @@ static ssize_t mem_control_type_read(struct cgroup *cont,
ppos, buf, s - buf); ppos, buf, s - buf);
} }
static ssize_t mem_force_empty_write(struct cgroup *cont,
struct cftype *cft, struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *ppos)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
int ret;
ret = mem_cgroup_force_empty(mem);
if (!ret)
ret = nbytes;
return ret;
}
/*
* Note: This should be removed if cgroup supports write-only file.
*/
static ssize_t mem_force_empty_read(struct cgroup *cont,
struct cftype *cft,
struct file *file, char __user *userbuf,
size_t nbytes, loff_t *ppos)
{
return -EINVAL;
}
static struct cftype mem_cgroup_files[] = { static struct cftype mem_cgroup_files[] = {
{ {
.name = "usage_in_bytes", .name = "usage_in_bytes",
@ -641,6 +732,11 @@ static struct cftype mem_cgroup_files[] = {
.write = mem_control_type_write, .write = mem_control_type_write,
.read = mem_control_type_read, .read = mem_control_type_read,
}, },
{
.name = "force_empty",
.write = mem_force_empty_write,
.read = mem_force_empty_read,
},
}; };
static struct mem_cgroup init_mem_cgroup; static struct mem_cgroup init_mem_cgroup;