mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma

The previous commit ("mm/thp: Allocate transparent hugepages on local node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a special policy for THP allocations. The function has the same interface as alloc_pages_vma(), shares a lot of boilerplate code and a long comment. This patch merges the hugepage special case into alloc_pages_vma. The extra if condition should be cheap enough price to pay. We also prevent a (however unlikely) race with parallel mems_allowed update, which could make hugepage allocation restart only within the fallback call to alloc_hugepage_vma() and not reconsider the special rule in alloc_hugepage_vma(). Also by making sure mpol_cond_put(pol) is always called before actual allocation attempt, we can use a single exit path within the function. Also update the comment for missing node parameter and obsolete reference to mm_sem. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-12-29 06:12:08 +00:00 · 2015-02-11 15:27:15 -08:00 · 2015-02-11 15:27:15 -08:00 · be97a41b29
commit be97a41b29
parent 077fcf116c
2 changed files with 39 additions and 91 deletions
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node);
+			int node, bool hugepage);
-extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
-				       unsigned long addr, int order);
+	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)	\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
 	alloc_pages(gfp_mask, order)
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
 extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@ -1988,120 +1988,68 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 *	@order:Order of the GFP allocation.
 * 	@vma:  Pointer to VMA or NULL if not available.
 *	@addr: Virtual Address of the allocation. Must be inside the VMA.
 *	@node: Which node to prefer for allocation (modulo policy).
 *	@hugepage: for hugepages try only the preferred node if possible
 *
 * 	This function allocates a page from the kernel page pool and applies
 *	a NUMA policy associated with the VMA or the current process.
 *	When VMA is not NULL caller must hold down_read on the mmap_sem of the
 *	mm_struct of the VMA to prevent it from going away. Should be used for
- *	all allocations for pages that will be mapped into
+ *	all allocations for pages that will be mapped into user space. Returns
- * 	user space. Returns NULL when no page can be allocated.
+ *	NULL when no page can be allocated.
 *
 *	Should be called with the mm_sem of the vma hold.
 */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node)
+		unsigned long addr, int node, bool hugepage)
 {
 	struct mempolicy *pol;
 	struct page *page;
 	unsigned int cpuset_mems_cookie;
 	struct zonelist *zl;
 	nodemask_t *nmask;
 retry_cpuset:
 	pol = get_vma_policy(vma, addr);
 	cpuset_mems_cookie = read_mems_allowed_begin();
-	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
+	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
 					pol->mode != MPOL_INTERLEAVE)) {
 		/*
 		 * For hugepage allocation and non-interleave policy which
 		 * allows the current node, we only try to allocate from the
 		 * current node and don't fall back to other nodes, as the
 		 * cost of remote accesses would likely offset THP benefits.
 		 *
 		 * If the policy is interleave, or does not allow the current
 		 * node in its nodemask, we allocate the standard way.
 		 */
 		nmask = policy_nodemask(gfp, pol);
 		if (!nmask || node_isset(node, *nmask)) {
 			mpol_cond_put(pol);
 			page = alloc_pages_exact_node(node, gfp, order);
 			goto out;
 		}
 	}
 	if (pol->mode == MPOL_INTERLEAVE) {
 		unsigned nid;
 		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
 		mpol_cond_put(pol);
 		page = alloc_page_interleave(gfp, order, nid);
-		if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
+		goto out;
 			goto retry_cpuset;
 		return page;
 	}
-	page = __alloc_pages_nodemask(gfp, order,
+
-				      policy_zonelist(gfp, pol, node),
+	nmask = policy_nodemask(gfp, pol);
-				      policy_nodemask(gfp, pol));
+	zl = policy_zonelist(gfp, pol, node);
 	mpol_cond_put(pol);
 	page = __alloc_pages_nodemask(gfp, order, zl, nmask);
 out:
 	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 		goto retry_cpuset;
 	return page;
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /**
 * alloc_hugepage_vma: Allocate a hugepage for a VMA
 * @gfp:
 *   %GFP_USER	  user allocation.
 *   %GFP_KERNEL  kernel allocations,
 *   %GFP_HIGHMEM highmem/user allocations,
 *   %GFP_FS	  allocation should not call back into a file system.
 *   %GFP_ATOMIC  don't sleep.
 *
 * @vma:   Pointer to VMA or NULL if not available.
 * @addr:  Virtual Address of the allocation. Must be inside the VMA.
 * @order: Order of the hugepage for gfp allocation.
 *
 * This functions allocate a huge page from the kernel page pool and applies
 * a NUMA policy associated with the VMA or the current process.
 * For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage
 * only from the current node if the current node is part of the node mask.
 * If we can't allocate a hugepage we fail the allocation and don' try to fallback
 * to other nodes in the node mask. If the current node is not part of node mask
 * or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can
 * fallback to nodes in the policy node mask.
 *
 * When VMA is not NULL caller must hold down_read on the mmap_sem of the
 * mm_struct of the VMA to prevent it from going away. Should be used for
 * all allocations for pages that will be mapped into
 * user space. Returns NULL when no page can be allocated.
 *
 * Should be called with vma->vm_mm->mmap_sem held.
 *
 */
 struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
 				unsigned long addr, int order)
 {
 	struct page *page;
 	nodemask_t *nmask;
 	struct mempolicy *pol;
 	int node = numa_node_id();
 	unsigned int cpuset_mems_cookie;
 retry_cpuset:
 	pol = get_vma_policy(vma, addr);
 	cpuset_mems_cookie = read_mems_allowed_begin();
 	/*
 	 * For interleave policy, we don't worry about
 	 * current node. Otherwise if current node is
 	 * in nodemask, try to allocate hugepage from
 	 * the current node. Don't fall back to other nodes
 	 * for THP.
 	 */
 	if (unlikely(pol->mode == MPOL_INTERLEAVE))
 		goto alloc_with_fallback;
 	nmask = policy_nodemask(gfp, pol);
 	if (!nmask || node_isset(node, *nmask)) {
 		mpol_cond_put(pol);
 		page = alloc_pages_exact_node(node, gfp, order);
 		if (unlikely(!page &&
 			     read_mems_allowed_retry(cpuset_mems_cookie)))
 			goto retry_cpuset;
 		return page;
 	}
 alloc_with_fallback:
 	mpol_cond_put(pol);
 	/*
 	 * if current node is not part of node mask, try
 	 * the allocation from any node, and we can do retry
 	 * in that case.
 	 */
 	return alloc_pages_vma(gfp, order, vma, addr, node);
 }
 #endif
 /**
 * 	alloc_pages_current - Allocate pages.
 *