From 1c634f94c3da39115270d35b3075af970810a927 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 14:09:28 -0600 Subject: [PATCH] gfs2: Do proper error checking for go_sync family of glops functions Before this patch, function do_xmote would try to sync out the glock dirty data by calling the appropriate glops function XXX_go_sync() but it did not check for a good return code. If the sync was not possible due to an io error or whatever, do_xmote would continue on and call go_inval and release the glock to other cluster nodes. When those nodes go to replay the journal, they may already be holding glocks for the journal records that should have been synced, but were not due to the ignored error. This patch introduces proper error code checking to the go_sync family of glops functions. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 16 ++++++++++++++-- fs/gfs2/glops.c | 30 +++++++++++++++++++----------- fs/gfs2/incore.h | 2 +- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 73cb5bcc37a7..0bfa58e5a64e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -602,8 +602,20 @@ __acquires(&gl->gl_lockref.lock) (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); - if (glops->go_sync) - glops->go_sync(gl); + if (glops->go_sync) { + ret = glops->go_sync(gl); + /* If we had a problem syncing (due to io errors or whatever, + * we should not invalidate the metadata or tell dlm to + * release the glock to other nodes. + */ + if (ret) { + if (cmpxchg(&sdp->sd_log_error, 0, ret)) { + fs_err(sdp, "Error %d syncing glock \n", ret); + gfs2_dump_glock(NULL, gl, true); + } + return; + } + } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { /* * The call to go_sync should have cleared out the ail list. diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bbbcae8d853c..9e9c7a4b8c66 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -82,10 +82,11 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, } -static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +static int gfs2_ail_empty_gl(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; + int ret; memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); @@ -116,7 +117,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) goto flush; if (log_in_flight) log_flush_wait(sdp); - return; + return 0; } /* A shortened, inline version of gfs2_trans_begin() @@ -124,8 +125,9 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes); tr.tr_ip = _RET_IP_; - if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) - return; + ret = gfs2_log_reserve(sdp, tr.tr_reserved); + if (ret < 0) + return ret; WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; @@ -135,6 +137,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) flush: gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_EMPTY_GL); + return 0; } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) @@ -168,7 +171,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) * return to caller to demote/unlock the glock until I/O is complete. */ -static void rgrp_go_sync(struct gfs2_glock *gl) +static int rgrp_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; @@ -176,21 +179,24 @@ static void rgrp_go_sync(struct gfs2_glock *gl) int error; if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) - return; + return 0; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_RGRP_GO_SYNC); filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + WARN_ON_ONCE(error); mapping_set_error(mapping, error); - gfs2_ail_empty_gl(gl); + if (!error) + error = gfs2_ail_empty_gl(gl); spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); spin_unlock(&gl->gl_lockref.lock); + return error; } /** @@ -257,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip) * */ -static void inode_go_sync(struct gfs2_glock *gl) +static int inode_go_sync(struct gfs2_glock *gl) { struct gfs2_inode *ip = gfs2_glock2inode(gl); int isreg = ip && S_ISREG(ip->i_inode.i_mode); struct address_space *metamapping = gfs2_glock2aspace(gl); - int error; + int error = 0; if (isreg) { if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) @@ -295,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl) out: gfs2_clear_glop_pending(ip); + return error; } /** @@ -515,7 +522,7 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, * */ -static void freeze_go_sync(struct gfs2_glock *gl) +static int freeze_go_sync(struct gfs2_glock *gl) { int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -529,7 +536,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) error); if (gfs2_withdrawn(sdp)) { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - return; + return 0; } gfs2_assert_withdraw(sdp, 0); } @@ -537,6 +544,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); } + return 0; } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8cd564bcf5e6..04549a8cae7e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -234,7 +234,7 @@ struct lm_lockname { struct gfs2_glock_operations { - void (*go_sync) (struct gfs2_glock *gl); + int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl);