userfaultfd: wp: support swap and page migration
For either swap and page migration, we all use the bit 2 of the entry to identify whether this entry is uffd write-protected. It plays a similar role as the existing soft dirty bit in swap entries but only for keeping the uffd-wp tracking for a specific PTE/PMD. Something special here is that when we want to recover the uffd-wp bit from a swap/migration entry to the PTE bit we'll also need to take care of the _PAGE_RW bit and make sure it's cleared, otherwise even with the _PAGE_UFFD_WP bit we can't trap it at all. In change_pte_range() we do nothing for uffd if the PTE is a swap entry. That can lead to data mismatch if the page that we are going to write protect is swapped out when sending the UFFDIO_WRITEPROTECT. This patch also applies/removes the uffd-wp bit even for the swap entries. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bobby Powers <bobbypowers@gmail.com> Cc: Brian Geffon <bgeffon@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Denis Plotnikov <dplotnikov@virtuozzo.com> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Martin Cracauer <cracauer@cons.org> Cc: Marty McFadden <mcfadden8@llnl.gov> Cc: Maya Gokhale <gokhale2@llnl.gov> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@fb.com> Link: http://lkml.kernel.org/r/20200220163112.11409-11-peterx@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									2e3d5dc508
								
							
						
					
					
						commit
						f45ec5ff16
					
				| @ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) | ||||
| 
 | ||||
| 	if (pte_swp_soft_dirty(pte)) | ||||
| 		pte = pte_swp_clear_soft_dirty(pte); | ||||
| 	if (pte_swp_uffd_wp(pte)) | ||||
| 		pte = pte_swp_clear_uffd_wp(pte); | ||||
| 	arch_entry = __pte_to_swp_entry(pte); | ||||
| 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); | ||||
| } | ||||
|  | ||||
| @ -2297,6 +2297,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | ||||
| 		write = is_write_migration_entry(entry); | ||||
| 		young = false; | ||||
| 		soft_dirty = pmd_swp_soft_dirty(old_pmd); | ||||
| 		uffd_wp = pmd_swp_uffd_wp(old_pmd); | ||||
| 	} else { | ||||
| 		page = pmd_page(old_pmd); | ||||
| 		if (pmd_dirty(old_pmd)) | ||||
| @ -2329,6 +2330,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | ||||
| 			entry = swp_entry_to_pte(swp_entry); | ||||
| 			if (soft_dirty) | ||||
| 				entry = pte_swp_mksoft_dirty(entry); | ||||
| 			if (uffd_wp) | ||||
| 				entry = pte_swp_mkuffd_wp(entry); | ||||
| 		} else { | ||||
| 			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); | ||||
| 			entry = maybe_mkwrite(entry, vma); | ||||
|  | ||||
| @ -733,6 +733,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | ||||
| 				pte = swp_entry_to_pte(entry); | ||||
| 				if (pte_swp_soft_dirty(*src_pte)) | ||||
| 					pte = pte_swp_mksoft_dirty(pte); | ||||
| 				if (pte_swp_uffd_wp(*src_pte)) | ||||
| 					pte = pte_swp_mkuffd_wp(pte); | ||||
| 				set_pte_at(src_mm, addr, src_pte, pte); | ||||
| 			} | ||||
| 		} else if (is_device_private_entry(entry)) { | ||||
| @ -762,6 +764,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | ||||
| 			    is_cow_mapping(vm_flags)) { | ||||
| 				make_device_private_entry_read(&entry); | ||||
| 				pte = swp_entry_to_pte(entry); | ||||
| 				if (pte_swp_uffd_wp(*src_pte)) | ||||
| 					pte = pte_swp_mkuffd_wp(pte); | ||||
| 				set_pte_at(src_mm, addr, src_pte, pte); | ||||
| 			} | ||||
| 		} | ||||
| @ -3098,6 +3102,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) | ||||
| 	flush_icache_page(vma, page); | ||||
| 	if (pte_swp_soft_dirty(vmf->orig_pte)) | ||||
| 		pte = pte_mksoft_dirty(pte); | ||||
| 	if (pte_swp_uffd_wp(vmf->orig_pte)) { | ||||
| 		pte = pte_mkuffd_wp(pte); | ||||
| 		pte = pte_wrprotect(pte); | ||||
| 	} | ||||
| 	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); | ||||
| 	arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); | ||||
| 	vmf->orig_pte = pte; | ||||
|  | ||||
| @ -243,11 +243,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, | ||||
| 		entry = pte_to_swp_entry(*pvmw.pte); | ||||
| 		if (is_write_migration_entry(entry)) | ||||
| 			pte = maybe_mkwrite(pte, vma); | ||||
| 		else if (pte_swp_uffd_wp(*pvmw.pte)) | ||||
| 			pte = pte_mkuffd_wp(pte); | ||||
| 
 | ||||
| 		if (unlikely(is_zone_device_page(new))) { | ||||
| 			if (is_device_private_page(new)) { | ||||
| 				entry = make_device_private_entry(new, pte_write(pte)); | ||||
| 				pte = swp_entry_to_pte(entry); | ||||
| 				if (pte_swp_uffd_wp(*pvmw.pte)) | ||||
| 					pte = pte_mkuffd_wp(pte); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| @ -2338,6 +2342,8 @@ again: | ||||
| 			swp_pte = swp_entry_to_pte(entry); | ||||
| 			if (pte_soft_dirty(pte)) | ||||
| 				swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||||
| 			if (pte_uffd_wp(pte)) | ||||
| 				swp_pte = pte_swp_mkuffd_wp(swp_pte); | ||||
| 			set_pte_at(mm, addr, ptep, swp_pte); | ||||
| 
 | ||||
| 			/*
 | ||||
|  | ||||
| @ -139,11 +139,11 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | ||||
| 			} | ||||
| 			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); | ||||
| 			pages++; | ||||
| 		} else if (IS_ENABLED(CONFIG_MIGRATION)) { | ||||
| 		} else if (is_swap_pte(oldpte)) { | ||||
| 			swp_entry_t entry = pte_to_swp_entry(oldpte); | ||||
| 			pte_t newpte; | ||||
| 
 | ||||
| 			if (is_write_migration_entry(entry)) { | ||||
| 				pte_t newpte; | ||||
| 				/*
 | ||||
| 				 * A protection check is difficult so | ||||
| 				 * just be safe and disable write | ||||
| @ -152,22 +152,28 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | ||||
| 				newpte = swp_entry_to_pte(entry); | ||||
| 				if (pte_swp_soft_dirty(oldpte)) | ||||
| 					newpte = pte_swp_mksoft_dirty(newpte); | ||||
| 				set_pte_at(vma->vm_mm, addr, pte, newpte); | ||||
| 
 | ||||
| 				pages++; | ||||
| 			} | ||||
| 
 | ||||
| 			if (is_write_device_private_entry(entry)) { | ||||
| 				pte_t newpte; | ||||
| 
 | ||||
| 				if (pte_swp_uffd_wp(oldpte)) | ||||
| 					newpte = pte_swp_mkuffd_wp(newpte); | ||||
| 			} else if (is_write_device_private_entry(entry)) { | ||||
| 				/*
 | ||||
| 				 * We do not preserve soft-dirtiness. See | ||||
| 				 * copy_one_pte() for explanation. | ||||
| 				 */ | ||||
| 				make_device_private_entry_read(&entry); | ||||
| 				newpte = swp_entry_to_pte(entry); | ||||
| 				set_pte_at(vma->vm_mm, addr, pte, newpte); | ||||
| 				if (pte_swp_uffd_wp(oldpte)) | ||||
| 					newpte = pte_swp_mkuffd_wp(newpte); | ||||
| 			} else { | ||||
| 				newpte = oldpte; | ||||
| 			} | ||||
| 
 | ||||
| 			if (uffd_wp) | ||||
| 				newpte = pte_swp_mkuffd_wp(newpte); | ||||
| 			else if (uffd_wp_resolve) | ||||
| 				newpte = pte_swp_clear_uffd_wp(newpte); | ||||
| 
 | ||||
| 			if (!pte_same(oldpte, newpte)) { | ||||
| 				set_pte_at(vma->vm_mm, addr, pte, newpte); | ||||
| 				pages++; | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| @ -1502,6 +1502,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | ||||
| 			swp_pte = swp_entry_to_pte(entry); | ||||
| 			if (pte_soft_dirty(pteval)) | ||||
| 				swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||||
| 			if (pte_uffd_wp(pteval)) | ||||
| 				swp_pte = pte_swp_mkuffd_wp(swp_pte); | ||||
| 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); | ||||
| 			/*
 | ||||
| 			 * No need to invalidate here it will synchronize on | ||||
| @ -1601,6 +1603,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | ||||
| 			swp_pte = swp_entry_to_pte(entry); | ||||
| 			if (pte_soft_dirty(pteval)) | ||||
| 				swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||||
| 			if (pte_uffd_wp(pteval)) | ||||
| 				swp_pte = pte_swp_mkuffd_wp(swp_pte); | ||||
| 			set_pte_at(mm, address, pvmw.pte, swp_pte); | ||||
| 			/*
 | ||||
| 			 * No need to invalidate here it will synchronize on | ||||
| @ -1667,6 +1671,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | ||||
| 			swp_pte = swp_entry_to_pte(entry); | ||||
| 			if (pte_soft_dirty(pteval)) | ||||
| 				swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||||
| 			if (pte_uffd_wp(pteval)) | ||||
| 				swp_pte = pte_swp_mkuffd_wp(swp_pte); | ||||
| 			set_pte_at(mm, address, pvmw.pte, swp_pte); | ||||
| 			/* Invalidate as we cleared the pte */ | ||||
| 			mmu_notifier_invalidate_range(mm, address, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user