thp: madvise(MADV_NOHUGEPAGE)

Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be
hugepage backed.  Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel.  Never silently return
success.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Andrea Arcangeli 2011-01-13 15:47:17 -08:00 committed by Linus Torvalds
parent 1ddd6db43a
commit a664b2d855
5 changed files with 46 additions and 21 deletions

View File

@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page,
#define HPAGE_PMD_SIZE HPAGE_SIZE
#define transparent_hugepage_enabled(__vma) \
(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_FLAG) || \
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
(__vma)->vm_flags & VM_HUGEPAGE))
((__vma)->vm_flags & VM_HUGEPAGE))) && \
!((__vma)->vm_flags & VM_NOHUGEPAGE))
#define transparent_hugepage_defrag(__vma) \
((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \
@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
extern int hugepage_madvise(unsigned long *vm_flags);
extern int hugepage_madvise(unsigned long *vm_flags, int advice);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page)
do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
static inline int hugepage_madvise(unsigned long *vm_flags)
static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
{
BUG();
return 0;

View File

@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm)
static inline int khugepaged_enter(struct vm_area_struct *vma)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if (khugepaged_always() ||
if ((khugepaged_always() ||
(khugepaged_req_madv() &&
vma->vm_flags & VM_HUGEPAGE))
vma->vm_flags & VM_HUGEPAGE)) &&
!(vma->vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;

View File

@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSUP 0x00000200
#else
#define VM_GROWSUP 0x00000000
#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */

View File

@ -16,6 +16,7 @@
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
@ -1388,18 +1389,36 @@ out:
return ret;
}
int hugepage_madvise(unsigned long *vm_flags)
int hugepage_madvise(unsigned long *vm_flags, int advice)
{
switch (advice) {
case MADV_HUGEPAGE:
/*
* Be somewhat over-protective like KSM for now!
*/
if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE |
if (*vm_flags & (VM_HUGEPAGE |
VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
VM_MIXEDMAP | VM_SAO))
return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
break;
case MADV_NOHUGEPAGE:
/*
* Be somewhat over-protective like KSM for now!
*/
if (*vm_flags & (VM_NOHUGEPAGE |
VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
VM_MIXEDMAP | VM_SAO))
return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE;
break;
}
return 0;
}

View File

@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
goto out;
break;
case MADV_HUGEPAGE:
error = hugepage_madvise(&new_flags);
case MADV_NOHUGEPAGE:
error = hugepage_madvise(&new_flags, behavior);
if (error)
goto out;
break;
@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
#endif
return 1;