mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY

Implement the missing huge page allocation functionality while obeying the
preferred node semantics.  This is similar to the implementation for
general page allocation, as it uses a fallback mechanism to try multiple
preferred nodes first, and then all other nodes.

To avoid adding too many "#ifdef CONFIG_NUMA" check, add a helper function
in mempolicy.h to check whether a mempolicy is MPOL_PREFERRED_MANY.

[akpm@linux-foundation.org: fix compiling issue when merging with other hugetlb patch]
[Thanks to 0day bot for catching the !CONFIG_NUMA compiling issue]
[mhocko@suse.com: suggest to remove the #ifdef CONFIG_NUMA check]
[ben.widawsky@intel.com: add helpers to avoid ifdefs]
  Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com
  Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@intel.com
  Link: https://lkml.kernel.org/r/20210809024430.GA46432@shbuild999.sh.intel.com
[nathan@kernel.org: initialize page to NULL in alloc_buddy_huge_page_with_mpol()]
  Link: https://lkml.kernel.org/r/20210810200632.3812797-1-nathan@kernel.org

Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com
Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@intel.com
Link: https://lkml.kernel.org/r/20210809024430.GA46432@shbuild999.sh.intel.com
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Co-developed-by: Feng Tang <feng.tang@intel.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Ben Widawsky 2021-09-02 15:00:13 -07:00 committed by Linus Torvalds
parent 4c54d94908
commit cfcaa66f80
2 changed files with 37 additions and 5 deletions

View File

@ -186,6 +186,12 @@ extern void mpol_put_task_policy(struct task_struct *);
extern bool numa_demotion_enabled;
static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{
return (pol->mode == MPOL_PREFERRED_MANY);
}
#else
struct mempolicy {};
@ -296,5 +302,11 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
}
#define numa_demotion_enabled false
static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{
return false;
}
#endif /* CONFIG_NUMA */
#endif

View File

@ -1145,7 +1145,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
unsigned long address, int avoid_reserve,
long chg)
{
struct page *page;
struct page *page = NULL;
struct mempolicy *mpol;
gfp_t gfp_mask;
nodemask_t *nodemask;
@ -1166,7 +1166,17 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
gfp_mask = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
if (mpol_is_preferred_many(mpol)) {
page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
/* Fallback to all nodes if page==NULL */
nodemask = NULL;
}
if (!page)
page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
@ -2142,16 +2152,26 @@ static
struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
{
struct page *page;
struct page *page = NULL;
struct mempolicy *mpol;
gfp_t gfp_mask = htlb_alloc_mask(h);
int nid;
nodemask_t *nodemask;
nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
mpol_cond_put(mpol);
if (mpol_is_preferred_many(mpol)) {
gfp_t gfp = gfp_mask | __GFP_NOWARN;
gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false);
/* Fallback to all nodes if page==NULL */
nodemask = NULL;
}
if (!page)
page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
mpol_cond_put(mpol);
return page;
}