linux/fs/gfs2/glops.c
Steven Whitehouse d8348de06f GFS2: Fix deadlock on journal flush
This patch fixes a deadlock when the journal is flushed and there
are dirty inodes other than the one which caused the journal flush.
Originally the journal flushing code was trying to obtain the
transaction glock while running the flush code for an inode glock.
We no longer require the transaction glock at this point in time
since we know that any attempt to get the transaction glock from
another node will result in a journal flush. So if we are flushing
the journal, we can be sure that the transaction lock is still
cached from when the transaction was started.

By inlining a version of gfs2_trans_begin() (minus the bit which
gets the transaction glock) we can avoid the deadlock problems
caused if there is a demote request queued up on the transaction
glock.

In addition I've also moved the umount rwsem so that it covers
the glock workqueue, since it all demotions are done by this
workqueue now. That fixes a bug on umount which I came across
while fixing the original problem.

Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2009-03-24 11:21:18 +00:00

450 lines
10 KiB
C

/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "util.h"
#include "trans.h"
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @gl: the glock
*
* None of the buffers should be dirty, locked, or pinned.
*/
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
struct gfs2_trans tr;
memset(&tr, 0, sizeof(tr));
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
if (!tr.tr_revokes)
return;
/* A shortened, inline version of gfs2_trans_begin() */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = (unsigned long)__builtin_return_address(0);
INIT_LIST_HEAD(&tr.tr_list_buf);
gfs2_log_reserve(sdp, tr.tr_reserved);
BUG_ON(current->journal_info);
current->journal_info = &tr;
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
gfs2_remove_from_ail(bd);
bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
gfs2_trans_add_revoke(sdp, bd);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
gfs2_log_unlock(sdp);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
/**
* gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
* @gl: the glock
*
*/
static void gfs2_pte_inval(struct gfs2_glock *gl)
{
struct gfs2_inode *ip;
struct inode *inode;
ip = gl->gl_object;
inode = &ip->i_inode;
if (!ip || !S_ISREG(inode->i_mode))
return;
unmap_shared_mapping_range(inode->i_mapping, 0, 0);
if (test_bit(GIF_SW_PAGED, &ip->i_flags))
set_bit(GLF_DIRTY, &gl->gl_flags);
}
/**
* meta_go_sync - sync out the metadata for this glock
* @gl: the glock
*
* Called when demoting or unlocking an EX glock. We must flush
* to disk all dirty buffers/pages relating to this glock, and must not
* not return to caller to demote/unlock the glock until I/O is complete.
*/
static void meta_go_sync(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_EXCLUSIVE)
return;
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
gfs2_ail_empty_gl(gl);
}
}
/**
* meta_go_inval - invalidate the metadata for this glock
* @gl: the glock
* @flags:
*
*/
static void meta_go_inval(struct gfs2_glock *gl, int flags)
{
if (!(flags & DIO_METADATA))
return;
gfs2_meta_inval(gl);
if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex))
gl->gl_sbd->sd_rindex_uptodate = 0;
else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
}
}
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
*
*/
static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
struct address_space *metamapping = gl->gl_aspace->i_mapping;
int error;
if (gl->gl_state != LM_ST_UNLOCKED)
gfs2_pte_inval(gl);
if (gl->gl_state != LM_ST_EXCLUSIVE)
return;
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
filemap_fdatawrite(metamapping);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
mapping_set_error(mapping, error);
}
error = filemap_fdatawait(metamapping);
mapping_set_error(metamapping, error);
clear_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_ail_empty_gl(gl);
}
}
/**
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
*
*/
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
int meta = (flags & DIO_METADATA);
if (meta) {
gfs2_meta_inval(gl);
if (ip)
set_bit(GIF_INVALID, &ip->i_flags);
}
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
}
/**
* inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
return 0;
return 1;
}
/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @gl: the glock
* @flags:
*
* Returns: errno
*/
static int inode_go_lock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
if (!ip || (gh->gh_flags & GL_SKIP))
return 0;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
return error;
}
if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE)) {
spin_lock(&sdp->sd_trunc_lock);
if (list_empty(&ip->i_trunc_list))
list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
return 1;
}
return error;
}
/**
* inode_go_dump - print information about an inode
* @seq: The iterator
* @ip: the inode
*
* Returns: 0 on success, -ENOBUFS when we run out of space
*/
static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
{
const struct gfs2_inode *ip = gl->gl_object;
if (ip == NULL)
return 0;
gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
(unsigned int)ip->i_diskflags,
(unsigned long long)ip->i_inode.i_size,
(unsigned long long)ip->i_disksize);
return 0;
}
/**
* rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
{
return !gl->gl_aspace->i_mapping->nrpages;
}
/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node.
* @gl: the glock
* @flags:
*
* Returns: errno
*/
static int rgrp_go_lock(struct gfs2_holder *gh)
{
return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
}
/**
* rgrp_go_unlock - operation done before an rgrp lock is unlocked by
* a last holder on this node.
* @gl: the glock
* @flags:
*
*/
static void rgrp_go_unlock(struct gfs2_holder *gh)
{
gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
}
/**
* rgrp_go_dump - print out an rgrp
* @seq: The iterator
* @gl: The glock in question
*
*/
static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
{
const struct gfs2_rgrpd *rgd = gl->gl_object;
if (rgd == NULL)
return 0;
gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
return 0;
}
/**
* trans_go_sync - promote/demote the transaction glock
* @gl: the glock
* @state: the requested state
* @flags:
*
*/
static void trans_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
if (gl->gl_state != LM_ST_UNLOCKED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
}
/**
* trans_go_xmote_bh - After promoting/demoting the transaction glock
* @gl: the glock
*
*/
static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_log_header_host head;
int error;
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
gfs2_consist(sdp);
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
}
return 0;
}
/**
* trans_go_demote_ok
* @gl: the glock
*
* Always returns 0
*/
static int trans_go_demote_ok(const struct gfs2_glock *gl)
{
return 0;
}
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_xmote_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
const struct gfs2_glock_operations gfs2_inode_glops = {
.go_xmote_th = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
.go_min_hold_time = HZ / 5,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = meta_go_sync,
.go_inval = meta_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_dump = rgrp_go_dump,
.go_type = LM_TYPE_RGRP,
.go_min_hold_time = HZ / 5,
};
const struct gfs2_glock_operations gfs2_trans_glops = {
.go_xmote_th = trans_go_sync,
.go_xmote_bh = trans_go_xmote_bh,
.go_demote_ok = trans_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_type = LM_TYPE_IOPEN,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_type = LM_TYPE_QUOTA,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
};