mm, thp: fix mapped pages avoiding unevictable list on mlock
When a transparent hugepage is mapped and it is included in an mlock() range, follow_page() incorrectly avoids setting the page's mlock bit and moving it to the unevictable lru. This is evident if you try to mlock(), munlock(), and then mlock() a range again. Currently: #define MAP_SIZE (4 << 30) /* 4GB */ void *ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); mlock(ptr, MAP_SIZE); $ grep -E "Unevictable|Inactive\(anon" /proc/meminfo Inactive(anon): 6304 kB Unevictable: 4213924 kB munlock(ptr, MAP_SIZE); Inactive(anon): 4186252 kB Unevictable: 19652 kB mlock(ptr, MAP_SIZE); Inactive(anon): 4198556 kB Unevictable: 21684 kB Notice that less than 2MB was added to the unevictable list; this is because these pages in the range are not transparent hugepages since the 4GB range was allocated with mmap() and has no specific alignment. If posix_memalign() were used instead, unevictable would not have grown at all on the second mlock(). The fix is to call mlock_vma_page() so that the mlock bit is set and the page is added to the unevictable list. With this patch: mlock(ptr, MAP_SIZE); Inactive(anon): 4056 kB Unevictable: 4213940 kB munlock(ptr, MAP_SIZE); Inactive(anon): 4198268 kB Unevictable: 19636 kB mlock(ptr, MAP_SIZE); Inactive(anon): 4008 kB Unevictable: 4213940 kB Signed-off-by: David Rientjes <rientjes@google.com> Acked-by: Hugh Dickins <hughd@google.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michel Lespinasse <walken@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									e90bdb7f52
								
							
						
					
					
						commit
						b676b293fb
					
				| @ -11,7 +11,7 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | ||||
| extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||||
| 			       unsigned long address, pmd_t *pmd, | ||||
| 			       pmd_t orig_pmd); | ||||
| extern struct page *follow_trans_huge_pmd(struct mm_struct *mm, | ||||
| extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | ||||
| 					  unsigned long addr, | ||||
| 					  pmd_t *pmd, | ||||
| 					  unsigned int flags); | ||||
|  | ||||
| @ -971,11 +971,12 @@ out_unlock: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| struct page *follow_trans_huge_pmd(struct mm_struct *mm, | ||||
| struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | ||||
| 				   unsigned long addr, | ||||
| 				   pmd_t *pmd, | ||||
| 				   unsigned int flags) | ||||
| { | ||||
| 	struct mm_struct *mm = vma->vm_mm; | ||||
| 	struct page *page = NULL; | ||||
| 
 | ||||
| 	assert_spin_locked(&mm->page_table_lock); | ||||
| @ -998,6 +999,14 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm, | ||||
| 		_pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); | ||||
| 		set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd); | ||||
| 	} | ||||
| 	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | ||||
| 		if (page->mapping && trylock_page(page)) { | ||||
| 			lru_add_drain(); | ||||
| 			if (page->mapping) | ||||
| 				mlock_vma_page(page); | ||||
| 			unlock_page(page); | ||||
| 		} | ||||
| 	} | ||||
| 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; | ||||
| 	VM_BUG_ON(!PageCompound(page)); | ||||
| 	if (flags & FOLL_GET) | ||||
|  | ||||
| @ -1528,7 +1528,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | ||||
| 				spin_unlock(&mm->page_table_lock); | ||||
| 				wait_split_huge_page(vma->anon_vma, pmd); | ||||
| 			} else { | ||||
| 				page = follow_trans_huge_pmd(mm, address, | ||||
| 				page = follow_trans_huge_pmd(vma, address, | ||||
| 							     pmd, flags); | ||||
| 				spin_unlock(&mm->page_table_lock); | ||||
| 				goto out; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user