xfs: fix shutdown hang on invalid inode during create
When the new inode verify in xfs_iread() fails, the create transaction is aborted and a shutdown occurs. The subsequent unmount then hangs in xfs_wait_buftarg() on a buffer that has an elevated hold count. Debug showed that it was an AGI buffer getting stuck: [ 22.576147] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 22.976213] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 23.376206] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 23.776325] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck The trace of this buffer leading up to the shutdown (trimmed for brevity) looks like: xfs_buf_init: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_get_map xfs_buf_get: bno 0x2 len 0x200 hold 1 caller xfs_buf_read_map xfs_buf_read: bno 0x2 len 0x200 hold 1 caller xfs_trans_read_buf_map xfs_buf_iorequest: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_iorequest xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_iorequest xfs_buf_iowait: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_ioerror: bno 0x2 len 0x200 hold 1 caller xfs_buf_bio_end_io xfs_buf_iodone: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_ioend xfs_buf_iowait_done: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_item_init xfs_trans_read_buf: bno 0x2 len 0x200 hold 2 recur 0 refcount 1 xfs_trans_brelse: bno 0x2 len 0x200 hold 2 recur 0 refcount 1 xfs_buf_item_relse: bno 0x2 nblks 0x1 hold 2 caller xfs_trans_brelse xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_relse xfs_buf_unlock: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse xfs_buf_rele: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse xfs_buf_trylock: bno 0x2 nblks 0x1 hold 2 caller _xfs_buf_find xfs_buf_find: bno 0x2 len 0x200 hold 2 caller xfs_buf_get_map xfs_buf_get: bno 0x2 len 0x200 hold 2 caller xfs_buf_read_map xfs_buf_read: bno 0x2 len 0x200 hold 2 caller xfs_trans_read_buf_map xfs_buf_hold: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_init xfs_trans_read_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1 xfs_trans_log_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1 xfs_buf_item_unlock: bno 0x2 len 0x200 hold 3 flags DIRTY liflags ABORTED xfs_buf_unlock: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock xfs_buf_rele: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock And that is the AGI buffer from cold cache read into memory to transaction abort. You can see at transaction abort the bli is dirty and only has a single reference. The item is not pinned, and it's not in the AIL. Hence the only reference to it is this transaction. The problem is that the xfs_buf_item_unlock() call is dropping the last reference to the xfs_buf_log_item attached to the buffer (which holds a reference to the buffer), but it is not freeing the xfs_buf_log_item. Hence nothing will ever release the buffer, and the unmount hangs waiting for this reference to go away. The fix is simple - xfs_buf_item_unlock needs to detect the last reference going away in this case and free the xfs_buf_log_item to release the reference it holds on the buffer. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
parent
f2a459565b
commit
9f87832a82
@ -1505,6 +1505,8 @@ restart:
|
||||
while (!list_empty(&btp->bt_lru)) {
|
||||
bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
|
||||
if (atomic_read(&bp->b_hold) > 1) {
|
||||
trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
|
||||
list_move_tail(&bp->b_lru, &btp->bt_lru);
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
delay(100);
|
||||
goto restart;
|
||||
|
@ -652,7 +652,10 @@ xfs_buf_item_unlock(
|
||||
|
||||
/*
|
||||
* If the buf item isn't tracking any data, free it, otherwise drop the
|
||||
* reference we hold to it.
|
||||
* reference we hold to it. If we are aborting the transaction, this may
|
||||
* be the only reference to the buf item, so we free it anyway
|
||||
* regardless of whether it is dirty or not. A dirty abort implies a
|
||||
* shutdown, anyway.
|
||||
*/
|
||||
clean = 1;
|
||||
for (i = 0; i < bip->bli_format_count; i++) {
|
||||
@ -664,7 +667,12 @@ xfs_buf_item_unlock(
|
||||
}
|
||||
if (clean)
|
||||
xfs_buf_item_relse(bp);
|
||||
else
|
||||
else if (aborted) {
|
||||
if (atomic_dec_and_test(&bip->bli_refcount)) {
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
|
||||
xfs_buf_item_relse(bp);
|
||||
}
|
||||
} else
|
||||
atomic_dec(&bip->bli_refcount);
|
||||
|
||||
if (!hold)
|
||||
|
@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
|
||||
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
|
||||
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
|
||||
DEFINE_BUF_EVENT(xfs_buf_error_relse);
|
||||
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
|
||||
DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
|
||||
DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user