memcgroup: fix hang with shmem/tmpfs

The memcgroup regime relies upon a cgroup reclaiming pages from itself within add_to_page_cache: which may involve some waiting. Whereas shmem and tmpfs rely upon using add_to_page_cache while holding a spinlock: when it cannot wait. The consequence is that when a cgroup reaches its limit, shmem_getpage just hangs - unless there is outside memory pressure too, neither kswapd nor radix_tree_preload get it out of the retry loop. In most cases we can mem_cgroup_cache_charge the page waitably first, to attach the page_cgroup in advance, so add_to_page_cache will do no more than increment a count; then mem_cgroup_uncharge_page after (in both success and failure cases) to balance the books again. And where there used to be a congestion_wait for kswapd (recently made redundant by radix_tree_preload), use mem_cgroup_cache_charge with NULL page to go through a cycle of allocation and freeing, without accounting to any particular page, and without updating the statistics vector. This brings the cgroup below its limit so the next try usually succeeds. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-11-10 14:11:52 +00:00 · 2008-02-07 00:14:22 -08:00 · 2008-02-07 00:14:22 -08:00 · 82369553d6
commit 82369553d6
parent 3be91277e7
2 changed files with 48 additions and 17 deletions
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@ -329,10 +329,12 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	 * with it
 	 */
 retry:
 	if (page) {
 		lock_page_cgroup(page);
 		pc = page_get_page_cgroup(page);
 		/*
-	 * The page_cgroup exists and the page has already been accounted
+		 * The page_cgroup exists and
 		 * the page has already been accounted.
 		 */
 		if (pc) {
 			if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
@ -346,6 +348,7 @@ retry:
 			}
 		}
 		unlock_page_cgroup(page);
 	}
 	pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
 	if (pc == NULL)
@ -404,7 +407,7 @@ retry:
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
 		pc->flags |= PAGE_CGROUP_FLAG_CACHE;
-	if (page_cgroup_assign_new_page_cgroup(page, pc)) {
+	if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) {
 		/*
 		 * Another charge has been added to this page already.
 		 * We take lock_page_cgroup(page) again and read
@ -413,6 +416,8 @@ retry:
 		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		css_put(&mem->css);
 		kfree(pc);
 		if (!page)
 			goto done;
 		goto retry;
 	}
--- a/mm/shmem.c
+++ b/mm/shmem.c
@ -912,9 +912,13 @@ found:
 	error = 1;
 	if (!inode)
 		goto out;
-	error = radix_tree_preload(GFP_KERNEL);
+	/* Precharge page while we can wait, compensate afterwards */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
 	error = radix_tree_preload(GFP_KERNEL);
 	if (error)
 		goto uncharge;
 	error = 1;
 	spin_lock(&info->lock);
@ -947,6 +951,8 @@ found:
 		shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
 	radix_tree_preload_end();
 uncharge:
 	mem_cgroup_uncharge_page(page);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@ -1308,6 +1314,13 @@ repeat:
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
 			page_cache_release(swappage);
 			if (error == -ENOMEM) {
 				/* allow reclaim from this memory cgroup */
 				error = mem_cgroup_cache_charge(NULL,
 					current->mm, gfp & ~__GFP_HIGHMEM);
 				if (error)
 					goto failed;
 			}
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
@ -1353,6 +1366,17 @@ repeat:
 				goto failed;
 			}
 			/* Precharge page while we can wait, compensate after */
 			error = mem_cgroup_cache_charge(filepage, current->mm,
 							gfp & ~__GFP_HIGHMEM);
 			if (error) {
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
 				filepage = NULL;
 				goto failed;
 			}
 			spin_lock(&info->lock);
 			entry = shmem_swp_alloc(info, idx, sgp);
 			if (IS_ERR(entry))
@ -1364,6 +1388,7 @@ repeat:
 			if (error || swap.val || 0 != add_to_page_cache_lru(
 					filepage, mapping, idx, GFP_NOWAIT)) {
 				spin_unlock(&info->lock);
 				mem_cgroup_uncharge_page(filepage);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
@ -1372,6 +1397,7 @@ repeat:
 					goto failed;
 				goto repeat;
 			}
 			mem_cgroup_uncharge_page(filepage);
 			info->flags |= SHMEM_PAGEIN;
 		}