fsdax: factor out helpers to simplify the dax fault code

The dax page fault code is too long and a bit difficult to read. And it
is hard to understand when we trying to add new features. Some of the
PTE/PMD codes have similar logic. So, factor out helper functions to
simplify the code.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[hch: minor cleanups]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
This commit is contained in:
Shiyang Ruan 2021-08-10 18:33:14 -07:00 committed by Darrick J. Wong
parent b74b1293e6
commit 55f81639a7

153
fs/dax.c
View File

@ -1255,6 +1255,53 @@ static bool dax_fault_is_synchronous(unsigned long flags,
&& (iomap->flags & IOMAP_F_DIRTY);
}
/*
* When handling a synchronous page fault and the inode need a fsync, we can
* insert the PTE/PMD into page tables only after that fsync happened. Skip
* insertion for now and return the pfn so that caller can insert it after the
* fsync is done.
*/
static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
{
if (WARN_ON_ONCE(!pfnp))
return VM_FAULT_SIGBUS;
*pfnp = pfn;
return VM_FAULT_NEEDDSYNC;
}
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
loff_t pos)
{
sector_t sector = dax_iomap_sector(iomap, pos);
unsigned long vaddr = vmf->address;
vm_fault_t ret;
int error = 0;
switch (iomap->type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
clear_user_highpage(vmf->cow_page, vaddr);
break;
case IOMAP_MAPPED:
error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, sector,
vmf->cow_page, vaddr);
break;
default:
WARN_ON_ONCE(1);
error = -EIO;
break;
}
if (error)
return dax_fault_return(error);
__SetPageUptodate(vmf->cow_page);
ret = finish_fault(vmf);
if (!ret)
return VM_FAULT_DONE_COW;
return ret;
}
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
int *iomap_errp, const struct iomap_ops *ops)
{
@ -1323,30 +1370,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
}
if (vmf->cow_page) {
sector_t sector = dax_iomap_sector(&iomap, pos);
switch (iomap.type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
clear_user_highpage(vmf->cow_page, vaddr);
break;
case IOMAP_MAPPED:
error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
sector, vmf->cow_page, vaddr);
break;
default:
WARN_ON_ONCE(1);
error = -EIO;
break;
}
if (error)
goto error_finish_iomap;
__SetPageUptodate(vmf->cow_page);
ret = finish_fault(vmf);
if (!ret)
ret = VM_FAULT_DONE_COW;
ret = dax_fault_cow_page(vmf, &iomap, pos);
goto finish_iomap;
}
@ -1366,19 +1390,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
0, write && !sync);
/*
* If we are doing synchronous page fault and inode needs fsync,
* we can insert PTE into page tables only after that happens.
* Skip insertion for now and return the pfn so that caller can
* insert it after fsync is done.
*/
if (sync) {
if (WARN_ON_ONCE(!pfnp)) {
error = -EIO;
goto error_finish_iomap;
}
*pfnp = pfn;
ret = VM_FAULT_NEEDDSYNC | major;
ret = dax_fault_synchronous_pfnp(pfnp, pfn);
goto finish_iomap;
}
trace_dax_insert_mapping(inode, vmf, entry);
@ -1477,13 +1490,45 @@ fallback:
return VM_FAULT_FALLBACK;
}
static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
pgoff_t max_pgoff)
{
unsigned long pmd_addr = vmf->address & PMD_MASK;
bool write = vmf->flags & FAULT_FLAG_WRITE;
/*
* Make sure that the faulting address's PMD offset (color) matches
* the PMD offset from the start of the file. This is necessary so
* that a PMD range in the page table overlaps exactly with a PMD
* range in the page cache.
*/
if ((vmf->pgoff & PG_PMD_COLOUR) !=
((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
return true;
/* Fall back to PTEs if we're going to COW */
if (write && !(vmf->vma->vm_flags & VM_SHARED))
return true;
/* If the PMD would extend outside the VMA */
if (pmd_addr < vmf->vma->vm_start)
return true;
if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
return true;
/* If the PMD would extend beyond the file size */
if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
return true;
return false;
}
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
const struct iomap_ops *ops)
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
unsigned long pmd_addr = vmf->address & PMD_MASK;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool sync;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
@ -1506,33 +1551,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
/*
* Make sure that the faulting address's PMD offset (color) matches
* the PMD offset from the start of the file. This is necessary so
* that a PMD range in the page table overlaps exactly with a PMD
* range in the page cache.
*/
if ((vmf->pgoff & PG_PMD_COLOUR) !=
((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
goto fallback;
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
/* If the PMD would extend outside the VMA */
if (pmd_addr < vma->vm_start)
goto fallback;
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
if (xas.xa_index >= max_pgoff) {
result = VM_FAULT_SIGBUS;
goto out;
}
/* If the PMD would extend beyond the file size */
if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
goto fallback;
/*
@ -1584,17 +1608,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
DAX_PMD, write && !sync);
/*
* If we are doing synchronous page fault and inode needs fsync,
* we can insert PMD into page tables only after that happens.
* Skip insertion for now and return the pfn so that caller can
* insert it after fsync is done.
*/
if (sync) {
if (WARN_ON_ONCE(!pfnp))
goto finish_iomap;
*pfnp = pfn;
result = VM_FAULT_NEEDDSYNC;
result = dax_fault_synchronous_pfnp(pfnp, pfn);
goto finish_iomap;
}