mm/mempolicy: use vma iterator & maple state instead of vma linked list

Reworked the way mbind_range() finds the first VMA to reuse the maple
state and limit the number of tree walks needed.

Note, this drops the VM_BUG_ON(!vma) call, which would catch a start
address higher than the last VMA.  The code was written in a way that
allowed no VMA updates to occur and still return success.  There should be
no functional change to this scenario with the new code.

Link: https://lkml.kernel.org/r/20220906194824.2110408-57-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Yu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Liam R. Howlett 2022-09-06 19:49:02 +00:00 committed by Andrew Morton
parent ba0aff8ea6
commit 66850be55e

View File

@ -381,9 +381,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mmap_write_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next)
for_each_vma(vmi, vma)
mpol_rebind_policy(vma->vm_policy, new);
mmap_write_unlock(mm);
}
@ -654,7 +655,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
static int queue_pages_test_walk(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
struct vm_area_struct *next, *vma = walk->vma;
struct queue_pages *qp = walk->private;
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
@ -669,9 +670,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
/* hole at head side of range */
return -EFAULT;
}
next = find_vma(vma->vm_mm, vma->vm_end);
if (!(flags & MPOL_MF_DISCONTIG_OK) &&
((vma->vm_end < qp->end) &&
(!vma->vm_next || vma->vm_end < vma->vm_next->vm_start)))
(!next || vma->vm_end < next->vm_start)))
/* hole at middle or tail of range */
return -EFAULT;
@ -785,26 +787,24 @@ static int vma_replace_policy(struct vm_area_struct *vma,
static int mbind_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{
MA_STATE(mas, &mm->mm_mt, start - 1, start - 1);
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
pgoff_t pgoff;
unsigned long vmstart;
unsigned long vmend;
vma = find_vma(mm, start);
VM_BUG_ON(!vma);
prev = mas_find_rev(&mas, 0);
if (prev && (start < prev->vm_end))
vma = prev;
else
vma = mas_next(&mas, end - 1);
prev = vma->vm_prev;
if (start > vma->vm_start)
prev = vma;
for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) {
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
for (; vma; vma = mas_next(&mas, end - 1)) {
unsigned long vmstart = max(start, vma->vm_start);
unsigned long vmend = min(end, vma->vm_end);
if (mpol_equal(vma_policy(vma), new_pol))
continue;
goto next;
pgoff = vma->vm_pgoff +
((vmstart - vma->vm_start) >> PAGE_SHIFT);
@ -813,6 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
new_pol, vma->vm_userfaultfd_ctx,
anon_vma_name(vma));
if (prev) {
/* vma_merge() invalidated the mas */
mas_pause(&mas);
vma = prev;
goto replace;
}
@ -820,19 +822,25 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
err = split_vma(vma->vm_mm, vma, vmstart, 1);
if (err)
goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
}
if (vma->vm_end != vmend) {
err = split_vma(vma->vm_mm, vma, vmend, 0);
if (err)
goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
}
replace:
replace:
err = vma_replace_policy(vma, new_pol);
if (err)
goto out;
next:
prev = vma;
}
out:
out:
return err;
}
@ -1049,6 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
int flags)
{
nodemask_t nmask;
struct vm_area_struct *vma;
LIST_HEAD(pagelist);
int err = 0;
struct migration_target_control mtc = {
@ -1064,8 +1073,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
* need migration. Between passing in the full user address
* space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
*/
vma = find_vma(mm, 0);
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
@ -1195,14 +1205,13 @@ static struct page *new_page(struct page *page, unsigned long start)
struct folio *dst, *src = page_folio(page);
struct vm_area_struct *vma;
unsigned long address;
VMA_ITERATOR(vmi, current->mm, start);
gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
vma = find_vma(current->mm, start);
while (vma) {
for_each_vma(vmi, vma) {
address = page_address_in_vma(page, vma);
if (address != -EFAULT)
break;
vma = vma->vm_next;
}
if (folio_test_hugetlb(src))
@ -1480,6 +1489,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long vmend;
unsigned long end;
int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
start = untagged_addr(start);
if (start & ~PAGE_MASK)
@ -1505,9 +1515,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start)
return 0;
mmap_write_lock(mm);
vma = find_vma(mm, start);
for (; vma && vma->vm_start < end; vma = vma->vm_next) {
for_each_vma_range(vmi, vma, end) {
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
new = mpol_dup(vma_policy(vma));