NFS: Fix another fsync() issue after a server reboot

Currently, when the writeback code detects a server reboot, it redirties
any pages that were not committed to disk, and it sets the flag
NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor
that dirtied the file. While this allows the file descriptor in question
to redrive its own writes, it violates the fsync() requirement that we
should be synchronising all writes to disk.
While the problem is infrequent, we do see corner cases where an
untimely server reboot causes the fsync() call to abandon its attempt to
sync data to disk and causing data corruption issues due to missed error
conditions or similar.

In order to tighted up the client's ability to deal with this situation
without introducing livelocks, add a counter that records the number of
times pages are redirtied due to a server reboot-like condition, and use
that in fsync() to redrive the sync to disk.

Fixes: 2197e9b06c ("NFS: Fix up fsync() when the server rebooted")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
Trond Myklebust 2022-08-13 08:22:25 -04:00
parent 2067231a9e
commit 67f4b5dc49
4 changed files with 12 additions and 11 deletions

View File

@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
struct nfs_inode *nfsi = NFS_I(inode);
long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
nredirtied = atomic_long_read(&nfsi->redirtied_pages);
if (nredirtied == save_nredirtied)
break;
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
* the NFS_CONTEXT_RESEND_WRITES flag
*/
start = 0;
end = LLONG_MAX;
save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);

View File

@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);

View File

@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */

View File

@ -182,6 +182,7 @@ struct nfs_inode {
/* Regular file */
struct {
atomic_long_t nrequests;
atomic_long_t redirtied_pages;
struct nfs_mds_commit_info commit_info;
struct mutex commit_mutex;
};