forked from Minki/linux
- Alistair Popple has a series which addresses a race which causes page
refcounting errors in ZONE_DEVICE pages. - Peter Xu fixes some userfaultfd test harness instability. - Various other patches in MM, mainly fixes. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCY0j6igAKCRDdBJ7gKXxA jnGxAP99bV39ZtOsoY4OHdZlWU16BUjKuf/cb3bZlC2G849vEwD+OKlij86SG20j MGJQ6TfULJ8f1dnQDd6wvDfl3FMl7Qc= =tbdp -----END PGP SIGNATURE----- Merge tag 'mm-stable-2022-10-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull more MM updates from Andrew Morton: - fix a race which causes page refcounting errors in ZONE_DEVICE pages (Alistair Popple) - fix userfaultfd test harness instability (Peter Xu) - various other patches in MM, mainly fixes * tag 'mm-stable-2022-10-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (29 commits) highmem: fix kmap_to_page() for kmap_local_page() addresses mm/page_alloc: fix incorrect PGFREE and PGALLOC for high-order page mm/selftest: uffd: explain the write missing fault check mm/hugetlb: use hugetlb_pte_stable in migration race check mm/hugetlb: fix race condition of uffd missing/minor handling zram: always expose rw_page LoongArch: update local TLB if PTE entry exists mm: use update_mmu_tlb() on the second thread kasan: fix array-bounds warnings in tests hmm-tests: add test for migrate_device_range() nouveau/dmem: evict device private memory during release nouveau/dmem: refactor nouveau_dmem_fault_copy_one() mm/migrate_device.c: add migrate_device_range() mm/migrate_device.c: refactor migrate_vma and migrate_deivce_coherent_page() mm/memremap.c: take a pgmap reference on page allocation mm: free device private pages have zero refcount mm/memory.c: fix race when faulting a device private page mm/damon: use damon_sz_region() in appropriate place mm/damon: move sz_damon_region to damon_sz_region lib/test_meminit: add checks for the allocation functions ...
This commit is contained in:
commit
5e714bf171
@ -412,6 +412,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
|
||||
__update_tlb(vma, address, ptep);
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_UPDATE_MMU_TLB
|
||||
#define update_mmu_tlb update_mmu_cache
|
||||
|
||||
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
|
@ -508,10 +508,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
|
||||
static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long end, unsigned long page_shift,
|
||||
struct kvm *kvm, unsigned long gpa)
|
||||
struct kvm *kvm, unsigned long gpa, struct page *fault_page)
|
||||
{
|
||||
unsigned long src_pfn, dst_pfn = 0;
|
||||
struct migrate_vma mig;
|
||||
struct migrate_vma mig = { 0 };
|
||||
struct page *dpage, *spage;
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
unsigned long pfn;
|
||||
@ -525,6 +525,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
|
||||
mig.dst = &dst_pfn;
|
||||
mig.pgmap_owner = &kvmppc_uvmem_pgmap;
|
||||
mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
|
||||
mig.fault_page = fault_page;
|
||||
|
||||
/* The requested page is already paged-out, nothing to do */
|
||||
if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
|
||||
@ -580,12 +581,14 @@ out_finalize:
|
||||
static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long page_shift,
|
||||
struct kvm *kvm, unsigned long gpa)
|
||||
struct kvm *kvm, unsigned long gpa,
|
||||
struct page *fault_page)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa);
|
||||
ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa,
|
||||
fault_page);
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
|
||||
return ret;
|
||||
@ -634,7 +637,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
|
||||
pvt->remove_gfn = true;
|
||||
|
||||
if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
|
||||
PAGE_SHIFT, kvm, pvt->gpa))
|
||||
PAGE_SHIFT, kvm, pvt->gpa, NULL))
|
||||
pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
|
||||
pvt->gpa, addr);
|
||||
} else {
|
||||
@ -715,7 +718,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
|
||||
|
||||
dpage = pfn_to_page(uvmem_pfn);
|
||||
dpage->zone_device_data = pvt;
|
||||
lock_page(dpage);
|
||||
zone_device_page_init(dpage);
|
||||
return dpage;
|
||||
out_clear:
|
||||
spin_lock(&kvmppc_uvmem_bitmap_lock);
|
||||
@ -736,7 +739,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
|
||||
bool pagein)
|
||||
{
|
||||
unsigned long src_pfn, dst_pfn = 0;
|
||||
struct migrate_vma mig;
|
||||
struct migrate_vma mig = { 0 };
|
||||
struct page *spage;
|
||||
unsigned long pfn;
|
||||
struct page *dpage;
|
||||
@ -994,7 +997,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
|
||||
|
||||
if (kvmppc_svm_page_out(vmf->vma, vmf->address,
|
||||
vmf->address + PAGE_SIZE, PAGE_SHIFT,
|
||||
pvt->kvm, pvt->gpa))
|
||||
pvt->kvm, pvt->gpa, vmf->page))
|
||||
return VM_FAULT_SIGBUS;
|
||||
else
|
||||
return 0;
|
||||
@ -1065,7 +1068,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
|
||||
if (!vma || vma->vm_start > start || vma->vm_end < end)
|
||||
goto out;
|
||||
|
||||
if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
|
||||
if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL))
|
||||
ret = H_SUCCESS;
|
||||
out:
|
||||
mmap_read_unlock(kvm->mm);
|
||||
|
@ -52,9 +52,6 @@ static unsigned int num_devices = 1;
|
||||
static size_t huge_class_size;
|
||||
|
||||
static const struct block_device_operations zram_devops;
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
static const struct block_device_operations zram_wb_devops;
|
||||
#endif
|
||||
|
||||
static void zram_free_page(struct zram *zram, size_t index);
|
||||
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
|
||||
@ -546,17 +543,6 @@ static ssize_t backing_dev_store(struct device *dev,
|
||||
zram->backing_dev = backing_dev;
|
||||
zram->bitmap = bitmap;
|
||||
zram->nr_pages = nr_pages;
|
||||
/*
|
||||
* With writeback feature, zram does asynchronous IO so it's no longer
|
||||
* synchronous device so let's remove synchronous io flag. Othewise,
|
||||
* upper layer(e.g., swap) could wait IO completion rather than
|
||||
* (submit and return), which will cause system sluggish.
|
||||
* Furthermore, when the IO function returns(e.g., swap_readpage),
|
||||
* upper layer expects IO was done so it could deallocate the page
|
||||
* freely but in fact, IO is going on so finally could cause
|
||||
* use-after-free when the IO is really done.
|
||||
*/
|
||||
zram->disk->fops = &zram_wb_devops;
|
||||
up_write(&zram->init_lock);
|
||||
|
||||
pr_info("setup backing device %s\n", file_name);
|
||||
@ -1270,6 +1256,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||
struct bio_vec bvec;
|
||||
|
||||
zram_slot_unlock(zram, index);
|
||||
/* A null bio means rw_page was used, we must fallback to bio */
|
||||
if (!bio)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
bvec.bv_page = page;
|
||||
bvec.bv_len = PAGE_SIZE;
|
||||
@ -1856,15 +1845,6 @@ static const struct block_device_operations zram_devops = {
|
||||
.owner = THIS_MODULE
|
||||
};
|
||||
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
static const struct block_device_operations zram_wb_devops = {
|
||||
.open = zram_open,
|
||||
.submit_bio = zram_submit_bio,
|
||||
.swap_slot_free_notify = zram_slot_free_notify,
|
||||
.owner = THIS_MODULE
|
||||
};
|
||||
#endif
|
||||
|
||||
static DEVICE_ATTR_WO(compact);
|
||||
static DEVICE_ATTR_RW(disksize);
|
||||
static DEVICE_ATTR_RO(initstate);
|
||||
|
@ -223,7 +223,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
|
||||
page = pfn_to_page(pfn);
|
||||
svm_range_bo_ref(prange->svm_bo);
|
||||
page->zone_device_data = prange->svm_bo;
|
||||
lock_page(page);
|
||||
zone_device_page_init(page);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -410,7 +410,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
struct kfd_process_device *pdd;
|
||||
struct dma_fence *mfence = NULL;
|
||||
struct migrate_vma migrate;
|
||||
struct migrate_vma migrate = { 0 };
|
||||
unsigned long cpages = 0;
|
||||
dma_addr_t *scratch;
|
||||
void *buf;
|
||||
@ -666,7 +666,7 @@ out_oom:
|
||||
static long
|
||||
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct vm_area_struct *vma, uint64_t start, uint64_t end,
|
||||
uint32_t trigger)
|
||||
uint32_t trigger, struct page *fault_page)
|
||||
{
|
||||
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
@ -674,7 +674,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
unsigned long cpages = 0;
|
||||
struct kfd_process_device *pdd;
|
||||
struct dma_fence *mfence = NULL;
|
||||
struct migrate_vma migrate;
|
||||
struct migrate_vma migrate = { 0 };
|
||||
dma_addr_t *scratch;
|
||||
void *buf;
|
||||
int r = -ENOMEM;
|
||||
@ -697,6 +697,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
|
||||
migrate.src = buf;
|
||||
migrate.dst = migrate.src + npages;
|
||||
migrate.fault_page = fault_page;
|
||||
scratch = (dma_addr_t *)(migrate.dst + npages);
|
||||
|
||||
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
|
||||
@ -764,7 +765,7 @@ out:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
|
||||
uint32_t trigger)
|
||||
uint32_t trigger, struct page *fault_page)
|
||||
{
|
||||
struct amdgpu_device *adev;
|
||||
struct vm_area_struct *vma;
|
||||
@ -805,7 +806,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
|
||||
}
|
||||
|
||||
next = min(vma->vm_end, end);
|
||||
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
|
||||
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger,
|
||||
fault_page);
|
||||
if (r < 0) {
|
||||
pr_debug("failed %ld to migrate prange %p\n", r, prange);
|
||||
break;
|
||||
@ -849,7 +851,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
|
||||
|
||||
do {
|
||||
r = svm_migrate_vram_to_ram(prange, mm, trigger);
|
||||
r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
} while (prange->actual_loc && --retries);
|
||||
@ -950,7 +952,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
|
||||
vmf->page);
|
||||
if (r)
|
||||
pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
|
||||
r, prange->svms, prange, prange->start, prange->last);
|
||||
|
@ -43,7 +43,7 @@ enum MIGRATION_COPY_DIR {
|
||||
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm, uint32_t trigger);
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
|
||||
uint32_t trigger);
|
||||
uint32_t trigger, struct page *fault_page);
|
||||
unsigned long
|
||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
|
||||
|
||||
|
@ -2913,13 +2913,15 @@ retry_write_locked:
|
||||
*/
|
||||
if (prange->actual_loc)
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
|
||||
NULL);
|
||||
else
|
||||
r = 0;
|
||||
}
|
||||
} else {
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
|
||||
NULL);
|
||||
}
|
||||
if (r) {
|
||||
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
|
||||
@ -3278,7 +3280,8 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
|
||||
return 0;
|
||||
|
||||
if (!best_loc) {
|
||||
r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
|
||||
*migrated = !r;
|
||||
return r;
|
||||
}
|
||||
@ -3339,7 +3342,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
|
||||
mutex_lock(&prange->migrate_mutex);
|
||||
do {
|
||||
r = svm_migrate_vram_to_ram(prange, mm,
|
||||
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
|
||||
KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
|
||||
} while (!r && prange->actual_loc && --retries);
|
||||
|
||||
if (!r && prange->actual_loc)
|
||||
|
@ -139,44 +139,24 @@ static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
|
||||
}
|
||||
}
|
||||
|
||||
static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
|
||||
struct vm_fault *vmf, struct migrate_vma *args,
|
||||
dma_addr_t *dma_addr)
|
||||
static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage,
|
||||
struct page *dpage, dma_addr_t *dma_addr)
|
||||
{
|
||||
struct device *dev = drm->dev->dev;
|
||||
struct page *dpage, *spage;
|
||||
struct nouveau_svmm *svmm;
|
||||
|
||||
spage = migrate_pfn_to_page(args->src[0]);
|
||||
if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
|
||||
return 0;
|
||||
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
|
||||
if (!dpage)
|
||||
return VM_FAULT_SIGBUS;
|
||||
lock_page(dpage);
|
||||
|
||||
*dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(dev, *dma_addr))
|
||||
goto error_free_page;
|
||||
return -EIO;
|
||||
|
||||
svmm = spage->zone_device_data;
|
||||
mutex_lock(&svmm->mutex);
|
||||
nouveau_svmm_invalidate(svmm, args->start, args->end);
|
||||
if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
|
||||
NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
|
||||
goto error_dma_unmap;
|
||||
mutex_unlock(&svmm->mutex);
|
||||
NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) {
|
||||
dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
args->dst[0] = migrate_pfn(page_to_pfn(dpage));
|
||||
return 0;
|
||||
|
||||
error_dma_unmap:
|
||||
mutex_unlock(&svmm->mutex);
|
||||
dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
error_free_page:
|
||||
__free_page(dpage);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
|
||||
@ -184,9 +164,11 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
|
||||
struct nouveau_drm *drm = page_to_drm(vmf->page);
|
||||
struct nouveau_dmem *dmem = drm->dmem;
|
||||
struct nouveau_fence *fence;
|
||||
struct nouveau_svmm *svmm;
|
||||
struct page *spage, *dpage;
|
||||
unsigned long src = 0, dst = 0;
|
||||
dma_addr_t dma_addr = 0;
|
||||
vm_fault_t ret;
|
||||
vm_fault_t ret = 0;
|
||||
struct migrate_vma args = {
|
||||
.vma = vmf->vma,
|
||||
.start = vmf->address,
|
||||
@ -207,10 +189,26 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
|
||||
if (!args.cpages)
|
||||
return 0;
|
||||
|
||||
ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
|
||||
if (ret || dst == 0)
|
||||
spage = migrate_pfn_to_page(src);
|
||||
if (!spage || !(src & MIGRATE_PFN_MIGRATE))
|
||||
goto done;
|
||||
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
|
||||
if (!dpage)
|
||||
goto done;
|
||||
|
||||
dst = migrate_pfn(page_to_pfn(dpage));
|
||||
|
||||
svmm = spage->zone_device_data;
|
||||
mutex_lock(&svmm->mutex);
|
||||
nouveau_svmm_invalidate(svmm, args.start, args.end);
|
||||
ret = nouveau_dmem_copy_one(drm, spage, dpage, &dma_addr);
|
||||
mutex_unlock(&svmm->mutex);
|
||||
if (ret) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
nouveau_fence_new(dmem->migrate.chan, false, &fence);
|
||||
migrate_vma_pages(&args);
|
||||
nouveau_dmem_fence_done(&fence);
|
||||
@ -326,7 +324,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
zone_device_page_init(page);
|
||||
return page;
|
||||
}
|
||||
|
||||
@ -369,6 +367,52 @@ nouveau_dmem_suspend(struct nouveau_drm *drm)
|
||||
mutex_unlock(&drm->dmem->mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evict all pages mapping a chunk.
|
||||
*/
|
||||
static void
|
||||
nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
|
||||
{
|
||||
unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT;
|
||||
unsigned long *src_pfns, *dst_pfns;
|
||||
dma_addr_t *dma_addrs;
|
||||
struct nouveau_fence *fence;
|
||||
|
||||
src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
|
||||
dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
|
||||
dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL);
|
||||
|
||||
migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
|
||||
npages);
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
if (src_pfns[i] & MIGRATE_PFN_MIGRATE) {
|
||||
struct page *dpage;
|
||||
|
||||
/*
|
||||
* _GFP_NOFAIL because the GPU is going away and there
|
||||
* is nothing sensible we can do if we can't copy the
|
||||
* data back.
|
||||
*/
|
||||
dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL);
|
||||
dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
|
||||
nouveau_dmem_copy_one(chunk->drm,
|
||||
migrate_pfn_to_page(src_pfns[i]), dpage,
|
||||
&dma_addrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
|
||||
migrate_device_pages(src_pfns, dst_pfns, npages);
|
||||
nouveau_dmem_fence_done(&fence);
|
||||
migrate_device_finalize(src_pfns, dst_pfns, npages);
|
||||
kfree(src_pfns);
|
||||
kfree(dst_pfns);
|
||||
for (i = 0; i < npages; i++)
|
||||
dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
kfree(dma_addrs);
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_dmem_fini(struct nouveau_drm *drm)
|
||||
{
|
||||
@ -380,8 +424,10 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
|
||||
mutex_lock(&drm->dmem->mutex);
|
||||
|
||||
list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
|
||||
nouveau_dmem_evict_chunk(chunk);
|
||||
nouveau_bo_unpin(chunk->bo);
|
||||
nouveau_bo_ref(NULL, &chunk->bo);
|
||||
WARN_ON(chunk->callocated);
|
||||
list_del(&chunk->list);
|
||||
memunmap_pages(&chunk->pagemap);
|
||||
release_mem_region(chunk->pagemap.range.start,
|
||||
|
@ -363,13 +363,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
|
||||
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
|
||||
if (!page || !PageUptodate(page)) {
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
|
||||
if (page)
|
||||
put_page(page);
|
||||
else if (num_ra_pages > 1)
|
||||
|
@ -258,13 +258,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
|
||||
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
|
||||
if (!page || !PageUptodate(page)) {
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
|
||||
if (page)
|
||||
put_page(page);
|
||||
else if (num_ra_pages > 1)
|
||||
|
@ -484,6 +484,12 @@ static inline struct damon_region *damon_first_region(struct damon_target *t)
|
||||
return list_first_entry(&t->regions_list, struct damon_region, list);
|
||||
}
|
||||
|
||||
static inline unsigned long damon_sz_region(struct damon_region *r)
|
||||
{
|
||||
return r->ar.end - r->ar.start;
|
||||
}
|
||||
|
||||
|
||||
#define damon_for_each_region(r, t) \
|
||||
list_for_each_entry(r, &t->regions_list, list)
|
||||
|
||||
|
@ -187,6 +187,7 @@ static inline bool folio_is_device_coherent(const struct folio *folio)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
void zone_device_page_init(struct page *page);
|
||||
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
|
||||
void memunmap_pages(struct dev_pagemap *pgmap);
|
||||
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
|
||||
|
@ -62,6 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES];
|
||||
#ifdef CONFIG_MIGRATION
|
||||
|
||||
extern void putback_movable_pages(struct list_head *l);
|
||||
int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode mode, int extra_count);
|
||||
int migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode mode);
|
||||
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
|
||||
@ -197,11 +199,24 @@ struct migrate_vma {
|
||||
*/
|
||||
void *pgmap_owner;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Set to vmf->page if this is being called to migrate a page as part of
|
||||
* a migrate_to_ram() callback.
|
||||
*/
|
||||
struct page *fault_page;
|
||||
};
|
||||
|
||||
int migrate_vma_setup(struct migrate_vma *args);
|
||||
void migrate_vma_pages(struct migrate_vma *migrate);
|
||||
void migrate_vma_finalize(struct migrate_vma *migrate);
|
||||
int migrate_device_range(unsigned long *src_pfns, unsigned long start,
|
||||
unsigned long npages);
|
||||
void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
|
||||
unsigned long npages);
|
||||
void migrate_device_finalize(unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns, unsigned long npages);
|
||||
|
||||
#endif /* CONFIG_MIGRATION */
|
||||
|
||||
#endif /* _LINUX_MIGRATE_H */
|
||||
|
@ -870,8 +870,6 @@ struct task_struct {
|
||||
struct mm_struct *mm;
|
||||
struct mm_struct *active_mm;
|
||||
|
||||
/* Per-thread vma caching: */
|
||||
|
||||
#ifdef SPLIT_RSS_COUNTING
|
||||
struct task_rss_stat rss_stat;
|
||||
#endif
|
||||
|
129
lib/test_hmm.c
129
lib/test_hmm.c
@ -100,6 +100,7 @@ struct dmirror {
|
||||
struct dmirror_chunk {
|
||||
struct dev_pagemap pagemap;
|
||||
struct dmirror_device *mdevice;
|
||||
bool remove;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -192,11 +193,15 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
|
||||
{
|
||||
return container_of(page->pgmap, struct dmirror_chunk, pagemap);
|
||||
}
|
||||
|
||||
static struct dmirror_device *dmirror_page_to_device(struct page *page)
|
||||
|
||||
{
|
||||
return container_of(page->pgmap, struct dmirror_chunk,
|
||||
pagemap)->mdevice;
|
||||
return dmirror_page_to_chunk(page)->mdevice;
|
||||
}
|
||||
|
||||
static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
|
||||
@ -627,8 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
|
||||
goto error;
|
||||
}
|
||||
|
||||
zone_device_page_init(dpage);
|
||||
dpage->zone_device_data = rpage;
|
||||
lock_page(dpage);
|
||||
return dpage;
|
||||
|
||||
error:
|
||||
@ -907,7 +912,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long src_pfns[64] = { 0 };
|
||||
unsigned long dst_pfns[64] = { 0 };
|
||||
struct migrate_vma args;
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
@ -968,7 +973,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
unsigned long src_pfns[64] = { 0 };
|
||||
unsigned long dst_pfns[64] = { 0 };
|
||||
struct dmirror_bounce bounce;
|
||||
struct migrate_vma args;
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
@ -1218,6 +1223,85 @@ static int dmirror_snapshot(struct dmirror *dmirror,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
|
||||
{
|
||||
unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
|
||||
unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
|
||||
unsigned long npages = end_pfn - start_pfn + 1;
|
||||
unsigned long i;
|
||||
unsigned long *src_pfns;
|
||||
unsigned long *dst_pfns;
|
||||
|
||||
src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
|
||||
dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
|
||||
|
||||
migrate_device_range(src_pfns, start_pfn, npages);
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct page *dpage, *spage;
|
||||
|
||||
spage = migrate_pfn_to_page(src_pfns[i]);
|
||||
if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
|
||||
if (WARN_ON(!is_device_private_page(spage) &&
|
||||
!is_device_coherent_page(spage)))
|
||||
continue;
|
||||
spage = BACKING_PAGE(spage);
|
||||
dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
|
||||
lock_page(dpage);
|
||||
copy_highpage(dpage, spage);
|
||||
dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
|
||||
if (src_pfns[i] & MIGRATE_PFN_WRITE)
|
||||
dst_pfns[i] |= MIGRATE_PFN_WRITE;
|
||||
}
|
||||
migrate_device_pages(src_pfns, dst_pfns, npages);
|
||||
migrate_device_finalize(src_pfns, dst_pfns, npages);
|
||||
kfree(src_pfns);
|
||||
kfree(dst_pfns);
|
||||
}
|
||||
|
||||
/* Removes free pages from the free list so they can't be re-allocated */
|
||||
static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
|
||||
{
|
||||
struct dmirror_device *mdevice = devmem->mdevice;
|
||||
struct page *page;
|
||||
|
||||
for (page = mdevice->free_pages; page; page = page->zone_device_data)
|
||||
if (dmirror_page_to_chunk(page) == devmem)
|
||||
mdevice->free_pages = page->zone_device_data;
|
||||
}
|
||||
|
||||
static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
mutex_lock(&mdevice->devmem_lock);
|
||||
if (mdevice->devmem_chunks) {
|
||||
for (i = 0; i < mdevice->devmem_count; i++) {
|
||||
struct dmirror_chunk *devmem =
|
||||
mdevice->devmem_chunks[i];
|
||||
|
||||
spin_lock(&mdevice->lock);
|
||||
devmem->remove = true;
|
||||
dmirror_remove_free_pages(devmem);
|
||||
spin_unlock(&mdevice->lock);
|
||||
|
||||
dmirror_device_evict_chunk(devmem);
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
|
||||
release_mem_region(devmem->pagemap.range.start,
|
||||
range_len(&devmem->pagemap.range));
|
||||
kfree(devmem);
|
||||
}
|
||||
mdevice->devmem_count = 0;
|
||||
mdevice->devmem_capacity = 0;
|
||||
mdevice->free_pages = NULL;
|
||||
kfree(mdevice->devmem_chunks);
|
||||
mdevice->devmem_chunks = NULL;
|
||||
}
|
||||
mutex_unlock(&mdevice->devmem_lock);
|
||||
}
|
||||
|
||||
static long dmirror_fops_unlocked_ioctl(struct file *filp,
|
||||
unsigned int command,
|
||||
unsigned long arg)
|
||||
@ -1272,6 +1356,11 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
|
||||
ret = dmirror_snapshot(dmirror, &cmd);
|
||||
break;
|
||||
|
||||
case HMM_DMIRROR_RELEASE:
|
||||
dmirror_device_remove_chunks(dmirror->mdevice);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1326,15 +1415,19 @@ static void dmirror_devmem_free(struct page *page)
|
||||
|
||||
mdevice = dmirror_page_to_device(page);
|
||||
spin_lock(&mdevice->lock);
|
||||
mdevice->cfree++;
|
||||
page->zone_device_data = mdevice->free_pages;
|
||||
mdevice->free_pages = page;
|
||||
|
||||
/* Return page to our allocator if not freeing the chunk */
|
||||
if (!dmirror_page_to_chunk(page)->remove) {
|
||||
mdevice->cfree++;
|
||||
page->zone_device_data = mdevice->free_pages;
|
||||
mdevice->free_pages = page;
|
||||
}
|
||||
spin_unlock(&mdevice->lock);
|
||||
}
|
||||
|
||||
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct migrate_vma args;
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long src_pfns = 0;
|
||||
unsigned long dst_pfns = 0;
|
||||
struct page *rpage;
|
||||
@ -1357,6 +1450,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
|
||||
args.dst = &dst_pfns;
|
||||
args.pgmap_owner = dmirror->mdevice;
|
||||
args.flags = dmirror_select_device(dmirror);
|
||||
args.fault_page = vmf->page;
|
||||
|
||||
if (migrate_vma_setup(&args))
|
||||
return VM_FAULT_SIGBUS;
|
||||
@ -1407,22 +1501,7 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
|
||||
|
||||
static void dmirror_device_remove(struct dmirror_device *mdevice)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (mdevice->devmem_chunks) {
|
||||
for (i = 0; i < mdevice->devmem_count; i++) {
|
||||
struct dmirror_chunk *devmem =
|
||||
mdevice->devmem_chunks[i];
|
||||
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
|
||||
release_mem_region(devmem->pagemap.range.start,
|
||||
range_len(&devmem->pagemap.range));
|
||||
kfree(devmem);
|
||||
}
|
||||
kfree(mdevice->devmem_chunks);
|
||||
}
|
||||
|
||||
dmirror_device_remove_chunks(mdevice);
|
||||
cdev_device_del(&mdevice->cdevice, &mdevice->device);
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,7 @@ struct hmm_dmirror_cmd {
|
||||
#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x06, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_RELEASE _IOWR('H', 0x07, struct hmm_dmirror_cmd)
|
||||
|
||||
/*
|
||||
* Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
|
||||
|
@ -67,17 +67,24 @@ static int __init do_alloc_pages_order(int order, int *total_failures)
|
||||
size_t size = PAGE_SIZE << order;
|
||||
|
||||
page = alloc_pages(GFP_KERNEL, order);
|
||||
if (!page)
|
||||
goto err;
|
||||
buf = page_address(page);
|
||||
fill_with_garbage(buf, size);
|
||||
__free_pages(page, order);
|
||||
|
||||
page = alloc_pages(GFP_KERNEL, order);
|
||||
if (!page)
|
||||
goto err;
|
||||
buf = page_address(page);
|
||||
if (count_nonzero_bytes(buf, size))
|
||||
(*total_failures)++;
|
||||
fill_with_garbage(buf, size);
|
||||
__free_pages(page, order);
|
||||
return 1;
|
||||
err:
|
||||
(*total_failures)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Test the page allocator by calling alloc_pages with different orders. */
|
||||
@ -100,15 +107,22 @@ static int __init do_kmalloc_size(size_t size, int *total_failures)
|
||||
void *buf;
|
||||
|
||||
buf = kmalloc(size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
fill_with_garbage(buf, size);
|
||||
kfree(buf);
|
||||
|
||||
buf = kmalloc(size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
if (count_nonzero_bytes(buf, size))
|
||||
(*total_failures)++;
|
||||
fill_with_garbage(buf, size);
|
||||
kfree(buf);
|
||||
return 1;
|
||||
err:
|
||||
(*total_failures)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Test vmalloc() with given parameters. */
|
||||
@ -117,15 +131,22 @@ static int __init do_vmalloc_size(size_t size, int *total_failures)
|
||||
void *buf;
|
||||
|
||||
buf = vmalloc(size);
|
||||
if (!buf)
|
||||
goto err;
|
||||
fill_with_garbage(buf, size);
|
||||
vfree(buf);
|
||||
|
||||
buf = vmalloc(size);
|
||||
if (!buf)
|
||||
goto err;
|
||||
if (count_nonzero_bytes(buf, size))
|
||||
(*total_failures)++;
|
||||
fill_with_garbage(buf, size);
|
||||
vfree(buf);
|
||||
return 1;
|
||||
err:
|
||||
(*total_failures)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Test kmalloc()/vmalloc() by allocating objects of different sizes. */
|
||||
|
@ -1847,7 +1847,6 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
|
||||
pfn = cc->zone->zone_start_pfn;
|
||||
cc->fast_search_fail = 0;
|
||||
found_block = true;
|
||||
set_pageblock_skip(freepage);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -491,7 +491,7 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
|
||||
|
||||
damon_for_each_target(t, ctx) {
|
||||
damon_for_each_region(r, t)
|
||||
sz += r->ar.end - r->ar.start;
|
||||
sz += damon_sz_region(r);
|
||||
}
|
||||
|
||||
if (ctx->attrs.min_nr_regions)
|
||||
@ -674,7 +674,7 @@ static bool __damos_valid_target(struct damon_region *r, struct damos *s)
|
||||
{
|
||||
unsigned long sz;
|
||||
|
||||
sz = r->ar.end - r->ar.start;
|
||||
sz = damon_sz_region(r);
|
||||
return s->pattern.min_sz_region <= sz &&
|
||||
sz <= s->pattern.max_sz_region &&
|
||||
s->pattern.min_nr_accesses <= r->nr_accesses &&
|
||||
@ -702,7 +702,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
|
||||
|
||||
damon_for_each_scheme(s, c) {
|
||||
struct damos_quota *quota = &s->quota;
|
||||
unsigned long sz = r->ar.end - r->ar.start;
|
||||
unsigned long sz = damon_sz_region(r);
|
||||
struct timespec64 begin, end;
|
||||
unsigned long sz_applied = 0;
|
||||
|
||||
@ -731,14 +731,14 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
|
||||
sz = ALIGN_DOWN(quota->charge_addr_from -
|
||||
r->ar.start, DAMON_MIN_REGION);
|
||||
if (!sz) {
|
||||
if (r->ar.end - r->ar.start <=
|
||||
DAMON_MIN_REGION)
|
||||
if (damon_sz_region(r) <=
|
||||
DAMON_MIN_REGION)
|
||||
continue;
|
||||
sz = DAMON_MIN_REGION;
|
||||
}
|
||||
damon_split_region_at(t, r, sz);
|
||||
r = damon_next_region(r);
|
||||
sz = r->ar.end - r->ar.start;
|
||||
sz = damon_sz_region(r);
|
||||
}
|
||||
quota->charge_target_from = NULL;
|
||||
quota->charge_addr_from = 0;
|
||||
@ -843,8 +843,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
|
||||
continue;
|
||||
score = c->ops.get_scheme_score(
|
||||
c, t, r, s);
|
||||
quota->histogram[score] +=
|
||||
r->ar.end - r->ar.start;
|
||||
quota->histogram[score] += damon_sz_region(r);
|
||||
if (score > max_score)
|
||||
max_score = score;
|
||||
}
|
||||
@ -865,18 +864,13 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long sz_damon_region(struct damon_region *r)
|
||||
{
|
||||
return r->ar.end - r->ar.start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge two adjacent regions into one region
|
||||
*/
|
||||
static void damon_merge_two_regions(struct damon_target *t,
|
||||
struct damon_region *l, struct damon_region *r)
|
||||
{
|
||||
unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
|
||||
unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r);
|
||||
|
||||
l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
|
||||
(sz_l + sz_r);
|
||||
@ -905,7 +899,7 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
|
||||
|
||||
if (prev && prev->ar.end == r->ar.start &&
|
||||
abs(prev->nr_accesses - r->nr_accesses) <= thres &&
|
||||
sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
|
||||
damon_sz_region(prev) + damon_sz_region(r) <= sz_limit)
|
||||
damon_merge_two_regions(t, prev, r);
|
||||
else
|
||||
prev = r;
|
||||
@ -963,7 +957,7 @@ static void damon_split_regions_of(struct damon_target *t, int nr_subs)
|
||||
int i;
|
||||
|
||||
damon_for_each_region_safe(r, next, t) {
|
||||
sz_region = r->ar.end - r->ar.start;
|
||||
sz_region = damon_sz_region(r);
|
||||
|
||||
for (i = 0; i < nr_subs - 1 &&
|
||||
sz_region > 2 * DAMON_MIN_REGION; i++) {
|
||||
|
@ -72,7 +72,7 @@ static int damon_va_evenly_split_region(struct damon_target *t,
|
||||
return -EINVAL;
|
||||
|
||||
orig_end = r->ar.end;
|
||||
sz_orig = r->ar.end - r->ar.start;
|
||||
sz_orig = damon_sz_region(r);
|
||||
sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
|
||||
|
||||
if (!sz_piece)
|
||||
@ -618,7 +618,7 @@ static unsigned long damos_madvise(struct damon_target *target,
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
unsigned long start = PAGE_ALIGN(r->ar.start);
|
||||
unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start);
|
||||
unsigned long len = PAGE_ALIGN(damon_sz_region(r));
|
||||
unsigned long applied;
|
||||
|
||||
mm = damon_get_mm(target);
|
||||
|
43
mm/highmem.c
43
mm/highmem.c
@ -30,6 +30,17 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#ifdef CONFIG_KMAP_LOCAL
|
||||
static inline int kmap_local_calc_idx(int idx)
|
||||
{
|
||||
return idx + KM_MAX_IDX * smp_processor_id();
|
||||
}
|
||||
|
||||
#ifndef arch_kmap_local_map_idx
|
||||
#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx)
|
||||
#endif
|
||||
#endif /* CONFIG_KMAP_LOCAL */
|
||||
|
||||
/*
|
||||
* Virtual_count is not a pure "count".
|
||||
* 0 means that it is not mapped, and has not been mapped
|
||||
@ -142,12 +153,29 @@ pte_t *pkmap_page_table;
|
||||
|
||||
struct page *__kmap_to_page(void *vaddr)
|
||||
{
|
||||
unsigned long base = (unsigned long) vaddr & PAGE_MASK;
|
||||
struct kmap_ctrl *kctrl = ¤t->kmap_ctrl;
|
||||
unsigned long addr = (unsigned long)vaddr;
|
||||
int i;
|
||||
|
||||
if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
|
||||
int i = PKMAP_NR(addr);
|
||||
/* kmap() mappings */
|
||||
if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) &&
|
||||
addr < PKMAP_ADDR(LAST_PKMAP)))
|
||||
return pte_page(pkmap_page_table[PKMAP_NR(addr)]);
|
||||
|
||||
return pte_page(pkmap_page_table[i]);
|
||||
/* kmap_local_page() mappings */
|
||||
if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) &&
|
||||
base < __fix_to_virt(FIX_KMAP_BEGIN))) {
|
||||
for (i = 0; i < kctrl->idx; i++) {
|
||||
unsigned long base_addr;
|
||||
int idx;
|
||||
|
||||
idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
|
||||
base_addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
||||
|
||||
if (base_addr == base)
|
||||
return pte_page(kctrl->pteval[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return virt_to_page(vaddr);
|
||||
@ -462,10 +490,6 @@ static inline void kmap_local_idx_pop(void)
|
||||
# define arch_kmap_local_post_unmap(vaddr) do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifndef arch_kmap_local_map_idx
|
||||
#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx)
|
||||
#endif
|
||||
|
||||
#ifndef arch_kmap_local_unmap_idx
|
||||
#define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx)
|
||||
#endif
|
||||
@ -494,11 +518,6 @@ static inline bool kmap_high_unmap_local(unsigned long vaddr)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int kmap_local_calc_idx(int idx)
|
||||
{
|
||||
return idx + KM_MAX_IDX * smp_processor_id();
|
||||
}
|
||||
|
||||
static pte_t *__kmap_pte;
|
||||
|
||||
static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
|
||||
|
72
mm/hugetlb.c
72
mm/hugetlb.c
@ -5096,6 +5096,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
|
||||
* unmapped and its refcount is dropped, so just clear pte here.
|
||||
*/
|
||||
if (unlikely(!pte_present(pte))) {
|
||||
#ifdef CONFIG_PTE_MARKER_UFFD_WP
|
||||
/*
|
||||
* If the pte was wr-protected by uffd-wp in any of the
|
||||
* swap forms, meanwhile the caller does not want to
|
||||
@ -5107,6 +5108,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
|
||||
set_huge_pte_at(mm, address, ptep,
|
||||
make_pte_marker(PTE_MARKER_UFFD_WP));
|
||||
else
|
||||
#endif
|
||||
huge_pte_clear(mm, address, ptep, sz);
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
@ -5135,11 +5137,13 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
|
||||
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
|
||||
if (huge_pte_dirty(pte))
|
||||
set_page_dirty(page);
|
||||
#ifdef CONFIG_PTE_MARKER_UFFD_WP
|
||||
/* Leave a uffd-wp pte marker if needed */
|
||||
if (huge_pte_uffd_wp(pte) &&
|
||||
!(zap_flags & ZAP_FLAG_DROP_MARKER))
|
||||
set_huge_pte_at(mm, address, ptep,
|
||||
make_pte_marker(PTE_MARKER_UFFD_WP));
|
||||
#endif
|
||||
hugetlb_count_sub(pages_per_huge_page(h), mm);
|
||||
page_remove_rmap(page, vma, true);
|
||||
|
||||
@ -5531,6 +5535,23 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
|
||||
return handle_userfault(&vmf, reason);
|
||||
}
|
||||
|
||||
/*
|
||||
* Recheck pte with pgtable lock. Returns true if pte didn't change, or
|
||||
* false if pte changed or is changing.
|
||||
*/
|
||||
static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm,
|
||||
pte_t *ptep, pte_t old_pte)
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
bool same;
|
||||
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
same = pte_same(huge_ptep_get(ptep), old_pte);
|
||||
spin_unlock(ptl);
|
||||
|
||||
return same;
|
||||
}
|
||||
|
||||
static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma,
|
||||
struct address_space *mapping, pgoff_t idx,
|
||||
@ -5571,10 +5592,33 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
if (idx >= size)
|
||||
goto out;
|
||||
/* Check for page in userfault range */
|
||||
if (userfaultfd_missing(vma))
|
||||
return hugetlb_handle_userfault(vma, mapping, idx,
|
||||
flags, haddr, address,
|
||||
VM_UFFD_MISSING);
|
||||
if (userfaultfd_missing(vma)) {
|
||||
/*
|
||||
* Since hugetlb_no_page() was examining pte
|
||||
* without pgtable lock, we need to re-test under
|
||||
* lock because the pte may not be stable and could
|
||||
* have changed from under us. Try to detect
|
||||
* either changed or during-changing ptes and retry
|
||||
* properly when needed.
|
||||
*
|
||||
* Note that userfaultfd is actually fine with
|
||||
* false positives (e.g. caused by pte changed),
|
||||
* but not wrong logical events (e.g. caused by
|
||||
* reading a pte during changing). The latter can
|
||||
* confuse the userspace, so the strictness is very
|
||||
* much preferred. E.g., MISSING event should
|
||||
* never happen on the page after UFFDIO_COPY has
|
||||
* correctly installed the page and returned.
|
||||
*/
|
||||
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return hugetlb_handle_userfault(vma, mapping, idx, flags,
|
||||
haddr, address,
|
||||
VM_UFFD_MISSING);
|
||||
}
|
||||
|
||||
page = alloc_huge_page(vma, haddr, 0);
|
||||
if (IS_ERR(page)) {
|
||||
@ -5590,11 +5634,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
* here. Before returning error, get ptl and make
|
||||
* sure there really is no pte entry.
|
||||
*/
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
ret = 0;
|
||||
if (huge_pte_none(huge_ptep_get(ptep)))
|
||||
if (hugetlb_pte_stable(h, mm, ptep, old_pte))
|
||||
ret = vmf_error(PTR_ERR(page));
|
||||
spin_unlock(ptl);
|
||||
else
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
clear_huge_page(page, address, pages_per_huge_page(h));
|
||||
@ -5640,9 +5683,14 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
if (userfaultfd_minor(vma)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return hugetlb_handle_userfault(vma, mapping, idx,
|
||||
flags, haddr, address,
|
||||
VM_UFFD_MINOR);
|
||||
/* See comment in userfaultfd_missing() block above */
|
||||
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
return hugetlb_handle_userfault(vma, mapping, idx, flags,
|
||||
haddr, address,
|
||||
VM_UFFD_MINOR);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6804,7 +6852,7 @@ void hugetlb_vma_lock_release(struct kref *kref)
|
||||
kfree(vma_lock);
|
||||
}
|
||||
|
||||
void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
|
||||
static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
|
||||
{
|
||||
struct vm_area_struct *vma = vma_lock->vma;
|
||||
|
||||
|
@ -295,6 +295,9 @@ static void krealloc_more_oob_helper(struct kunit *test,
|
||||
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
|
||||
|
||||
/* Suppress -Warray-bounds warnings. */
|
||||
OPTIMIZER_HIDE_VAR(ptr2);
|
||||
|
||||
/* All offsets up to size2 must be accessible. */
|
||||
ptr2[size1 - 1] = 'x';
|
||||
ptr2[size1] = 'x';
|
||||
@ -327,6 +330,9 @@ static void krealloc_less_oob_helper(struct kunit *test,
|
||||
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
|
||||
|
||||
/* Suppress -Warray-bounds warnings. */
|
||||
OPTIMIZER_HIDE_VAR(ptr2);
|
||||
|
||||
/* Must be accessible for all modes. */
|
||||
ptr2[size2 - 1] = 'x';
|
||||
|
||||
@ -540,13 +546,14 @@ static void kmalloc_memmove_invalid_size(struct kunit *test)
|
||||
{
|
||||
char *ptr;
|
||||
size_t size = 64;
|
||||
volatile size_t invalid_size = size;
|
||||
size_t invalid_size = size;
|
||||
|
||||
ptr = kmalloc(size, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
|
||||
|
||||
memset((char *)ptr, 0, 64);
|
||||
OPTIMIZER_HIDE_VAR(ptr);
|
||||
OPTIMIZER_HIDE_VAR(invalid_size);
|
||||
KUNIT_EXPECT_KASAN_FAIL(test,
|
||||
memmove((char *)ptr, (char *)ptr + 4, invalid_size));
|
||||
kfree(ptr);
|
||||
|
20
mm/memory.c
20
mm/memory.c
@ -1393,10 +1393,12 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *pte,
|
||||
struct zap_details *details, pte_t pteval)
|
||||
{
|
||||
#ifdef CONFIG_PTE_MARKER_UFFD_WP
|
||||
if (zap_drop_file_uffd_wp(details))
|
||||
return;
|
||||
|
||||
pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
||||
@ -3748,7 +3750,21 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
ret = remove_device_exclusive_entry(vmf);
|
||||
} else if (is_device_private_entry(entry)) {
|
||||
vmf->page = pfn_swap_entry_to_page(entry);
|
||||
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a page reference while we know the page can't be
|
||||
* freed.
|
||||
*/
|
||||
get_page(vmf->page);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
vmf->page->pgmap->ops->migrate_to_ram(vmf);
|
||||
put_page(vmf->page);
|
||||
} else if (is_hwpoison_entry(entry)) {
|
||||
ret = VM_FAULT_HWPOISON;
|
||||
} else if (is_swapin_error_entry(entry)) {
|
||||
@ -4118,7 +4134,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
|
||||
&vmf->ptl);
|
||||
if (!pte_none(*vmf->pte)) {
|
||||
update_mmu_cache(vma, vmf->address, vmf->pte);
|
||||
update_mmu_tlb(vma, vmf->address, vmf->pte);
|
||||
goto release;
|
||||
}
|
||||
|
||||
|
@ -138,8 +138,11 @@ void memunmap_pages(struct dev_pagemap *pgmap)
|
||||
int i;
|
||||
|
||||
percpu_ref_kill(&pgmap->ref);
|
||||
for (i = 0; i < pgmap->nr_range; i++)
|
||||
percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
|
||||
if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
|
||||
pgmap->type != MEMORY_DEVICE_COHERENT)
|
||||
for (i = 0; i < pgmap->nr_range; i++)
|
||||
percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
|
||||
|
||||
wait_for_completion(&pgmap->done);
|
||||
|
||||
for (i = 0; i < pgmap->nr_range; i++)
|
||||
@ -264,7 +267,9 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
|
||||
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
|
||||
PHYS_PFN(range->start),
|
||||
PHYS_PFN(range_len(range)), pgmap);
|
||||
percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
|
||||
if (pgmap->type != MEMORY_DEVICE_PRIVATE &&
|
||||
pgmap->type != MEMORY_DEVICE_COHERENT)
|
||||
percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
|
||||
return 0;
|
||||
|
||||
err_add_memory:
|
||||
@ -502,12 +507,29 @@ void free_zone_device_page(struct page *page)
|
||||
page->mapping = NULL;
|
||||
page->pgmap->ops->page_free(page);
|
||||
|
||||
/*
|
||||
* Reset the page count to 1 to prepare for handing out the page again.
|
||||
*/
|
||||
set_page_count(page, 1);
|
||||
if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
|
||||
page->pgmap->type != MEMORY_DEVICE_COHERENT)
|
||||
/*
|
||||
* Reset the page count to 1 to prepare for handing out the page
|
||||
* again.
|
||||
*/
|
||||
set_page_count(page, 1);
|
||||
else
|
||||
put_dev_pagemap(page->pgmap);
|
||||
}
|
||||
|
||||
void zone_device_page_init(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Drivers shouldn't be allocating pages after calling
|
||||
* memunmap_pages().
|
||||
*/
|
||||
WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
|
||||
set_page_count(page, 1);
|
||||
lock_page(page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zone_device_page_init);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
bool __put_devmap_managed_page_refs(struct page *page, int refs)
|
||||
{
|
||||
|
34
mm/migrate.c
34
mm/migrate.c
@ -625,6 +625,25 @@ EXPORT_SYMBOL(folio_migrate_copy);
|
||||
* Migration functions
|
||||
***********************************************************/
|
||||
|
||||
int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode mode, int extra_count)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BUG_ON(folio_test_writeback(src)); /* Writeback must be complete */
|
||||
|
||||
rc = folio_migrate_mapping(mapping, dst, src, extra_count);
|
||||
|
||||
if (rc != MIGRATEPAGE_SUCCESS)
|
||||
return rc;
|
||||
|
||||
if (mode != MIGRATE_SYNC_NO_COPY)
|
||||
folio_migrate_copy(dst, src);
|
||||
else
|
||||
folio_migrate_flags(dst, src);
|
||||
return MIGRATEPAGE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* migrate_folio() - Simple folio migration.
|
||||
* @mapping: The address_space containing the folio.
|
||||
@ -640,20 +659,7 @@ EXPORT_SYMBOL(folio_migrate_copy);
|
||||
int migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
struct folio *src, enum migrate_mode mode)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BUG_ON(folio_test_writeback(src)); /* Writeback must be complete */
|
||||
|
||||
rc = folio_migrate_mapping(mapping, dst, src, 0);
|
||||
|
||||
if (rc != MIGRATEPAGE_SUCCESS)
|
||||
return rc;
|
||||
|
||||
if (mode != MIGRATE_SYNC_NO_COPY)
|
||||
folio_migrate_copy(dst, src);
|
||||
else
|
||||
folio_migrate_flags(dst, src);
|
||||
return MIGRATEPAGE_SUCCESS;
|
||||
return migrate_folio_extra(mapping, dst, src, mode, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_folio);
|
||||
|
||||
|
@ -325,14 +325,14 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
|
||||
* folio_migrate_mapping(), except that here we allow migration of a
|
||||
* ZONE_DEVICE page.
|
||||
*/
|
||||
static bool migrate_vma_check_page(struct page *page)
|
||||
static bool migrate_vma_check_page(struct page *page, struct page *fault_page)
|
||||
{
|
||||
/*
|
||||
* One extra ref because caller holds an extra reference, either from
|
||||
* isolate_lru_page() for a regular page, or migrate_vma_collect() for
|
||||
* a device page.
|
||||
*/
|
||||
int extra = 1;
|
||||
int extra = 1 + (page == fault_page);
|
||||
|
||||
/*
|
||||
* FIXME support THP (transparent huge page), it is bit more complex to
|
||||
@ -357,26 +357,20 @@ static bool migrate_vma_check_page(struct page *page)
|
||||
}
|
||||
|
||||
/*
|
||||
* migrate_vma_unmap() - replace page mapping with special migration pte entry
|
||||
* @migrate: migrate struct containing all migration information
|
||||
*
|
||||
* Isolate pages from the LRU and replace mappings (CPU page table pte) with a
|
||||
* special migration pte entry and check if it has been pinned. Pinned pages are
|
||||
* restored because we cannot migrate them.
|
||||
*
|
||||
* This is the last step before we call the device driver callback to allocate
|
||||
* destination memory and copy contents of original page over to new page.
|
||||
* Unmaps pages for migration. Returns number of unmapped pages.
|
||||
*/
|
||||
static void migrate_vma_unmap(struct migrate_vma *migrate)
|
||||
static unsigned long migrate_device_unmap(unsigned long *src_pfns,
|
||||
unsigned long npages,
|
||||
struct page *fault_page)
|
||||
{
|
||||
const unsigned long npages = migrate->npages;
|
||||
unsigned long i, restore = 0;
|
||||
bool allow_drain = true;
|
||||
unsigned long unmapped = 0;
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct page *page = migrate_pfn_to_page(migrate->src[i]);
|
||||
struct page *page = migrate_pfn_to_page(src_pfns[i]);
|
||||
struct folio *folio;
|
||||
|
||||
if (!page)
|
||||
@ -391,8 +385,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
|
||||
}
|
||||
|
||||
if (isolate_lru_page(page)) {
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
migrate->cpages--;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
restore++;
|
||||
continue;
|
||||
}
|
||||
@ -405,34 +398,55 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
|
||||
if (folio_mapped(folio))
|
||||
try_to_migrate(folio, 0);
|
||||
|
||||
if (page_mapped(page) || !migrate_vma_check_page(page)) {
|
||||
if (page_mapped(page) ||
|
||||
!migrate_vma_check_page(page, fault_page)) {
|
||||
if (!is_zone_device_page(page)) {
|
||||
get_page(page);
|
||||
putback_lru_page(page);
|
||||
}
|
||||
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
migrate->cpages--;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
restore++;
|
||||
continue;
|
||||
}
|
||||
|
||||
unmapped++;
|
||||
}
|
||||
|
||||
for (i = 0; i < npages && restore; i++) {
|
||||
struct page *page = migrate_pfn_to_page(migrate->src[i]);
|
||||
struct page *page = migrate_pfn_to_page(src_pfns[i]);
|
||||
struct folio *folio;
|
||||
|
||||
if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
|
||||
if (!page || (src_pfns[i] & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
|
||||
folio = page_folio(page);
|
||||
remove_migration_ptes(folio, folio, false);
|
||||
|
||||
migrate->src[i] = 0;
|
||||
src_pfns[i] = 0;
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
restore--;
|
||||
}
|
||||
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
/*
|
||||
* migrate_vma_unmap() - replace page mapping with special migration pte entry
|
||||
* @migrate: migrate struct containing all migration information
|
||||
*
|
||||
* Isolate pages from the LRU and replace mappings (CPU page table pte) with a
|
||||
* special migration pte entry and check if it has been pinned. Pinned pages are
|
||||
* restored because we cannot migrate them.
|
||||
*
|
||||
* This is the last step before we call the device driver callback to allocate
|
||||
* destination memory and copy contents of original page over to new page.
|
||||
*/
|
||||
static void migrate_vma_unmap(struct migrate_vma *migrate)
|
||||
{
|
||||
migrate->cpages = migrate_device_unmap(migrate->src, migrate->npages,
|
||||
migrate->fault_page);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -517,6 +531,8 @@ int migrate_vma_setup(struct migrate_vma *args)
|
||||
return -EINVAL;
|
||||
if (!args->src || !args->dst)
|
||||
return -EINVAL;
|
||||
if (args->fault_page && !is_device_private_page(args->fault_page))
|
||||
return -EINVAL;
|
||||
|
||||
memset(args->src, 0, sizeof(*args->src) * nr_pages);
|
||||
args->cpages = 0;
|
||||
@ -677,42 +693,38 @@ abort:
|
||||
*src &= ~MIGRATE_PFN_MIGRATE;
|
||||
}
|
||||
|
||||
/**
|
||||
* migrate_vma_pages() - migrate meta-data from src page to dst page
|
||||
* @migrate: migrate struct containing all migration information
|
||||
*
|
||||
* This migrates struct page meta-data from source struct page to destination
|
||||
* struct page. This effectively finishes the migration from source page to the
|
||||
* destination page.
|
||||
*/
|
||||
void migrate_vma_pages(struct migrate_vma *migrate)
|
||||
static void __migrate_device_pages(unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns, unsigned long npages,
|
||||
struct migrate_vma *migrate)
|
||||
{
|
||||
const unsigned long npages = migrate->npages;
|
||||
const unsigned long start = migrate->start;
|
||||
struct mmu_notifier_range range;
|
||||
unsigned long addr, i;
|
||||
unsigned long i;
|
||||
bool notified = false;
|
||||
|
||||
for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
|
||||
struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
|
||||
struct page *page = migrate_pfn_to_page(migrate->src[i]);
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct page *newpage = migrate_pfn_to_page(dst_pfns[i]);
|
||||
struct page *page = migrate_pfn_to_page(src_pfns[i]);
|
||||
struct address_space *mapping;
|
||||
int r;
|
||||
|
||||
if (!newpage) {
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page) {
|
||||
unsigned long addr;
|
||||
|
||||
if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The only time there is no vma is when called from
|
||||
* migrate_device_coherent_page(). However this isn't
|
||||
* called if the page could not be unmapped.
|
||||
*/
|
||||
VM_BUG_ON(!migrate->vma);
|
||||
if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
VM_BUG_ON(!migrate);
|
||||
addr = migrate->start + i*PAGE_SIZE;
|
||||
if (!notified) {
|
||||
notified = true;
|
||||
|
||||
@ -723,7 +735,7 @@ void migrate_vma_pages(struct migrate_vma *migrate)
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
}
|
||||
migrate_vma_insert_page(migrate, addr, newpage,
|
||||
&migrate->src[i]);
|
||||
&src_pfns[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -736,21 +748,26 @@ void migrate_vma_pages(struct migrate_vma *migrate)
|
||||
* device private or coherent memory.
|
||||
*/
|
||||
if (mapping) {
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
continue;
|
||||
}
|
||||
} else if (is_zone_device_page(newpage)) {
|
||||
/*
|
||||
* Other types of ZONE_DEVICE page are not supported.
|
||||
*/
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
continue;
|
||||
}
|
||||
|
||||
r = migrate_folio(mapping, page_folio(newpage),
|
||||
page_folio(page), MIGRATE_SYNC_NO_COPY);
|
||||
if (migrate && migrate->fault_page == page)
|
||||
r = migrate_folio_extra(mapping, page_folio(newpage),
|
||||
page_folio(page),
|
||||
MIGRATE_SYNC_NO_COPY, 1);
|
||||
else
|
||||
r = migrate_folio(mapping, page_folio(newpage),
|
||||
page_folio(page), MIGRATE_SYNC_NO_COPY);
|
||||
if (r != MIGRATEPAGE_SUCCESS)
|
||||
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -761,28 +778,56 @@ void migrate_vma_pages(struct migrate_vma *migrate)
|
||||
if (notified)
|
||||
mmu_notifier_invalidate_range_only_end(&range);
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_vma_pages);
|
||||
|
||||
/**
|
||||
* migrate_vma_finalize() - restore CPU page table entry
|
||||
* migrate_device_pages() - migrate meta-data from src page to dst page
|
||||
* @src_pfns: src_pfns returned from migrate_device_range()
|
||||
* @dst_pfns: array of pfns allocated by the driver to migrate memory to
|
||||
* @npages: number of pages in the range
|
||||
*
|
||||
* Equivalent to migrate_vma_pages(). This is called to migrate struct page
|
||||
* meta-data from source struct page to destination.
|
||||
*/
|
||||
void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
|
||||
unsigned long npages)
|
||||
{
|
||||
__migrate_device_pages(src_pfns, dst_pfns, npages, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_device_pages);
|
||||
|
||||
/**
|
||||
* migrate_vma_pages() - migrate meta-data from src page to dst page
|
||||
* @migrate: migrate struct containing all migration information
|
||||
*
|
||||
* This replaces the special migration pte entry with either a mapping to the
|
||||
* new page if migration was successful for that page, or to the original page
|
||||
* otherwise.
|
||||
*
|
||||
* This also unlocks the pages and puts them back on the lru, or drops the extra
|
||||
* refcount, for device pages.
|
||||
* This migrates struct page meta-data from source struct page to destination
|
||||
* struct page. This effectively finishes the migration from source page to the
|
||||
* destination page.
|
||||
*/
|
||||
void migrate_vma_finalize(struct migrate_vma *migrate)
|
||||
void migrate_vma_pages(struct migrate_vma *migrate)
|
||||
{
|
||||
__migrate_device_pages(migrate->src, migrate->dst, migrate->npages, migrate);
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_vma_pages);
|
||||
|
||||
/*
|
||||
* migrate_device_finalize() - complete page migration
|
||||
* @src_pfns: src_pfns returned from migrate_device_range()
|
||||
* @dst_pfns: array of pfns allocated by the driver to migrate memory to
|
||||
* @npages: number of pages in the range
|
||||
*
|
||||
* Completes migration of the page by removing special migration entries.
|
||||
* Drivers must ensure copying of page data is complete and visible to the CPU
|
||||
* before calling this.
|
||||
*/
|
||||
void migrate_device_finalize(unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns, unsigned long npages)
|
||||
{
|
||||
const unsigned long npages = migrate->npages;
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct folio *dst, *src;
|
||||
struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
|
||||
struct page *page = migrate_pfn_to_page(migrate->src[i]);
|
||||
struct page *newpage = migrate_pfn_to_page(dst_pfns[i]);
|
||||
struct page *page = migrate_pfn_to_page(src_pfns[i]);
|
||||
|
||||
if (!page) {
|
||||
if (newpage) {
|
||||
@ -792,7 +837,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
|
||||
if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
|
||||
if (newpage) {
|
||||
unlock_page(newpage);
|
||||
put_page(newpage);
|
||||
@ -819,8 +864,72 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_device_finalize);
|
||||
|
||||
/**
|
||||
* migrate_vma_finalize() - restore CPU page table entry
|
||||
* @migrate: migrate struct containing all migration information
|
||||
*
|
||||
* This replaces the special migration pte entry with either a mapping to the
|
||||
* new page if migration was successful for that page, or to the original page
|
||||
* otherwise.
|
||||
*
|
||||
* This also unlocks the pages and puts them back on the lru, or drops the extra
|
||||
* refcount, for device pages.
|
||||
*/
|
||||
void migrate_vma_finalize(struct migrate_vma *migrate)
|
||||
{
|
||||
migrate_device_finalize(migrate->src, migrate->dst, migrate->npages);
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_vma_finalize);
|
||||
|
||||
/**
|
||||
* migrate_device_range() - migrate device private pfns to normal memory.
|
||||
* @src_pfns: array large enough to hold migrating source device private pfns.
|
||||
* @start: starting pfn in the range to migrate.
|
||||
* @npages: number of pages to migrate.
|
||||
*
|
||||
* migrate_vma_setup() is similar in concept to migrate_vma_setup() except that
|
||||
* instead of looking up pages based on virtual address mappings a range of
|
||||
* device pfns that should be migrated to system memory is used instead.
|
||||
*
|
||||
* This is useful when a driver needs to free device memory but doesn't know the
|
||||
* virtual mappings of every page that may be in device memory. For example this
|
||||
* is often the case when a driver is being unloaded or unbound from a device.
|
||||
*
|
||||
* Like migrate_vma_setup() this function will take a reference and lock any
|
||||
* migrating pages that aren't free before unmapping them. Drivers may then
|
||||
* allocate destination pages and start copying data from the device to CPU
|
||||
* memory before calling migrate_device_pages().
|
||||
*/
|
||||
int migrate_device_range(unsigned long *src_pfns, unsigned long start,
|
||||
unsigned long npages)
|
||||
{
|
||||
unsigned long i, pfn;
|
||||
|
||||
for (pfn = start, i = 0; i < npages; pfn++, i++) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (!get_page_unless_zero(page)) {
|
||||
src_pfns[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!trylock_page(page)) {
|
||||
src_pfns[i] = 0;
|
||||
put_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
src_pfns[i] = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
|
||||
}
|
||||
|
||||
migrate_device_unmap(src_pfns, npages, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_device_range);
|
||||
|
||||
/*
|
||||
* Migrate a device coherent page back to normal memory. The caller should have
|
||||
* a reference on page which will be copied to the new page if migration is
|
||||
@ -829,25 +938,19 @@ EXPORT_SYMBOL(migrate_vma_finalize);
|
||||
int migrate_device_coherent_page(struct page *page)
|
||||
{
|
||||
unsigned long src_pfn, dst_pfn = 0;
|
||||
struct migrate_vma args;
|
||||
struct page *dpage;
|
||||
|
||||
WARN_ON_ONCE(PageCompound(page));
|
||||
|
||||
lock_page(page);
|
||||
src_pfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE;
|
||||
args.src = &src_pfn;
|
||||
args.dst = &dst_pfn;
|
||||
args.cpages = 1;
|
||||
args.npages = 1;
|
||||
args.vma = NULL;
|
||||
|
||||
/*
|
||||
* We don't have a VMA and don't need to walk the page tables to find
|
||||
* the source page. So call migrate_vma_unmap() directly to unmap the
|
||||
* page as migrate_vma_setup() will fail if args.vma == NULL.
|
||||
*/
|
||||
migrate_vma_unmap(&args);
|
||||
migrate_device_unmap(&src_pfn, 1, NULL);
|
||||
if (!(src_pfn & MIGRATE_PFN_MIGRATE))
|
||||
return -EBUSY;
|
||||
|
||||
@ -857,10 +960,10 @@ int migrate_device_coherent_page(struct page *page)
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dpage));
|
||||
}
|
||||
|
||||
migrate_vma_pages(&args);
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
if (src_pfn & MIGRATE_PFN_MIGRATE)
|
||||
copy_highpage(dpage, page);
|
||||
migrate_vma_finalize(&args);
|
||||
migrate_device_finalize(&src_pfn, &dst_pfn, 1);
|
||||
|
||||
if (src_pfn & MIGRATE_PFN_MIGRATE)
|
||||
return 0;
|
||||
|
28
mm/mmap.c
28
mm/mmap.c
@ -2673,7 +2673,7 @@ cannot_expand:
|
||||
if (!arch_validate_flags(vma->vm_flags)) {
|
||||
error = -EINVAL;
|
||||
if (file)
|
||||
goto unmap_and_free_vma;
|
||||
goto close_and_free_vma;
|
||||
else
|
||||
goto free_vma;
|
||||
}
|
||||
@ -2742,6 +2742,9 @@ expanded:
|
||||
validate_mm(mm);
|
||||
return addr;
|
||||
|
||||
close_and_free_vma:
|
||||
if (vma->vm_ops && vma->vm_ops->close)
|
||||
vma->vm_ops->close(vma);
|
||||
unmap_and_free_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
@ -2942,17 +2945,18 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
|
||||
if (vma &&
|
||||
(!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) &&
|
||||
((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) {
|
||||
mas->index = vma->vm_start;
|
||||
mas->last = addr + len - 1;
|
||||
vma_adjust_trans_huge(vma, addr, addr + len, 0);
|
||||
mas_set_range(mas, vma->vm_start, addr + len - 1);
|
||||
if (mas_preallocate(mas, vma, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
|
||||
if (vma->anon_vma) {
|
||||
anon_vma_lock_write(vma->anon_vma);
|
||||
anon_vma_interval_tree_pre_update_vma(vma);
|
||||
}
|
||||
vma->vm_end = addr + len;
|
||||
vma->vm_flags |= VM_SOFTDIRTY;
|
||||
if (mas_store_gfp(mas, vma, GFP_KERNEL))
|
||||
goto mas_expand_failed;
|
||||
mas_store_prealloc(mas, vma);
|
||||
|
||||
if (vma->anon_vma) {
|
||||
anon_vma_interval_tree_post_update_vma(vma);
|
||||
@ -2993,13 +2997,6 @@ mas_store_fail:
|
||||
vma_alloc_fail:
|
||||
vm_unacct_memory(len >> PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
|
||||
mas_expand_failed:
|
||||
if (vma->anon_vma) {
|
||||
anon_vma_interval_tree_post_update_vma(vma);
|
||||
anon_vma_unlock_write(vma->anon_vma);
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
|
||||
@ -3240,6 +3237,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||
out_vma_link:
|
||||
if (new_vma->vm_ops && new_vma->vm_ops->close)
|
||||
new_vma->vm_ops->close(new_vma);
|
||||
|
||||
if (new_vma->vm_file)
|
||||
fput(new_vma->vm_file);
|
||||
|
||||
unlink_anon_vmas(new_vma);
|
||||
out_free_mempol:
|
||||
mpol_put(vma_policy(new_vma));
|
||||
out_free_vma:
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/mm_inline.h>
|
||||
@ -265,6 +266,15 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
|
||||
static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
|
||||
bool fullmm)
|
||||
{
|
||||
/*
|
||||
* struct mmu_gather contains 7 1-bit fields packed into a 32-bit
|
||||
* unsigned int value. The remaining 25 bits remain uninitialized
|
||||
* and are never used, but KMSAN updates the origin for them in
|
||||
* zap_pXX_range() in mm/memory.c, thus creating very long origin
|
||||
* chains. This is technically correct, but consumes too much memory.
|
||||
* Unpoisoning the whole structure will prevent creating such chains.
|
||||
*/
|
||||
kmsan_unpoison_memory(tlb, sizeof(*tlb));
|
||||
tlb->mm = mm;
|
||||
tlb->fullmm = fullmm;
|
||||
|
||||
|
@ -267,6 +267,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
|
||||
} else {
|
||||
/* It must be an none page, or what else?.. */
|
||||
WARN_ON_ONCE(!pte_none(oldpte));
|
||||
#ifdef CONFIG_PTE_MARKER_UFFD_WP
|
||||
if (unlikely(uffd_wp && !vma_is_anonymous(vma))) {
|
||||
/*
|
||||
* For file-backed mem, we need to be able to
|
||||
@ -278,6 +279,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
|
||||
make_pte_marker(PTE_MARKER_UFFD_WP));
|
||||
pages++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
arch_leave_lazy_mmu_mode();
|
||||
|
@ -3446,7 +3446,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
|
||||
int pindex;
|
||||
bool free_high;
|
||||
|
||||
__count_vm_event(PGFREE);
|
||||
__count_vm_events(PGFREE, 1 << order);
|
||||
pindex = order_to_pindex(migratetype, order);
|
||||
list_add(&page->pcp_list, &pcp->lists[pindex]);
|
||||
pcp->count += 1 << order;
|
||||
@ -3803,7 +3803,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
|
||||
pcp_spin_unlock_irqrestore(pcp, flags);
|
||||
pcp_trylock_finish(UP_flags);
|
||||
if (page) {
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone, 1);
|
||||
}
|
||||
return page;
|
||||
@ -6823,6 +6823,14 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
|
||||
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* ZONE_DEVICE pages are released directly to the driver page allocator
|
||||
* which will set the page count to 1 when allocating the page.
|
||||
*/
|
||||
if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
|
||||
pgmap->type == MEMORY_DEVICE_COHERENT)
|
||||
set_page_count(page, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1054,6 +1054,55 @@ TEST_F(hmm, migrate_fault)
|
||||
hmm_buffer_free(buffer);
|
||||
}
|
||||
|
||||
TEST_F(hmm, migrate_release)
|
||||
{
|
||||
struct hmm_buffer *buffer;
|
||||
unsigned long npages;
|
||||
unsigned long size;
|
||||
unsigned long i;
|
||||
int *ptr;
|
||||
int ret;
|
||||
|
||||
npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
|
||||
ASSERT_NE(npages, 0);
|
||||
size = npages << self->page_shift;
|
||||
|
||||
buffer = malloc(sizeof(*buffer));
|
||||
ASSERT_NE(buffer, NULL);
|
||||
|
||||
buffer->fd = -1;
|
||||
buffer->size = size;
|
||||
buffer->mirror = malloc(size);
|
||||
ASSERT_NE(buffer->mirror, NULL);
|
||||
|
||||
buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
|
||||
ASSERT_NE(buffer->ptr, MAP_FAILED);
|
||||
|
||||
/* Initialize buffer in system memory. */
|
||||
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
|
||||
ptr[i] = i;
|
||||
|
||||
/* Migrate memory to device. */
|
||||
ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_EQ(buffer->cpages, npages);
|
||||
|
||||
/* Check what the device read. */
|
||||
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
|
||||
ASSERT_EQ(ptr[i], i);
|
||||
|
||||
/* Release device memory. */
|
||||
ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
/* Fault pages back to system memory and check them. */
|
||||
for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
|
||||
ASSERT_EQ(ptr[i], i);
|
||||
|
||||
hmm_buffer_free(buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrate anonymous shared memory to device private memory.
|
||||
*/
|
||||
|
@ -774,7 +774,27 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
|
||||
continue_range(uffd, msg->arg.pagefault.address, page_size);
|
||||
stats->minor_faults++;
|
||||
} else {
|
||||
/* Missing page faults */
|
||||
/*
|
||||
* Missing page faults.
|
||||
*
|
||||
* Here we force a write check for each of the missing mode
|
||||
* faults. It's guaranteed because the only threads that
|
||||
* will trigger uffd faults are the locking threads, and
|
||||
* their first instruction to touch the missing page will
|
||||
* always be pthread_mutex_lock().
|
||||
*
|
||||
* Note that here we relied on an NPTL glibc impl detail to
|
||||
* always read the lock type at the entry of the lock op
|
||||
* (pthread_mutex_t.__data.__type, offset 0x10) before
|
||||
* doing any locking operations to guarantee that. It's
|
||||
* actually not good to rely on this impl detail because
|
||||
* logically a pthread-compatible lib can implement the
|
||||
* locks without types and we can fail when linking with
|
||||
* them. However since we used to find bugs with this
|
||||
* strict check we still keep it around. Hopefully this
|
||||
* could be a good hint when it fails again. If one day
|
||||
* it'll break on some other impl of glibc we'll revisit.
|
||||
*/
|
||||
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
|
||||
err("unexpected write fault");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user