xfs: fix locking in xfs_iget_cache_hit
The locking in xfs_iget_cache_hit currently has numerous problems: - we clear the reclaim tag without i_flags_lock which protects modifications to it - we call inode_init_always which can sleep with pag_ici_lock held (this is oss.sgi.com BZ #819) - we acquire and drop i_flags_lock a lot and thus provide no consistency between the various flags we set/clear under it This patch fixes all that with a major revamp of the locking in the function. The new version acquires i_flags_lock early and only drops it once we need to call into inode_init_always or before calling xfs_ilock. This patch fixes a bug seen in the wild where we race modifying the reclaim tag. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Felix Blyakher <felixb@sgi.com> Reviewed-by: Eric Sandeen <sandeen@sandeen.net> Signed-off-by: Felix Blyakher <felixb@sgi.com>
This commit is contained in:
parent
79dd43bb85
commit
a022fe0970
@ -708,6 +708,16 @@ xfs_reclaim_inode(
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
__xfs_inode_set_reclaim_tag(
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
radix_tree_tag_set(&pag->pag_ici_root,
|
||||
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
|
||||
XFS_ICI_RECLAIM_TAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* We set the inode flag atomically with the radix tree tag.
|
||||
* Once we get tag lookups on the radix tree, this inode flag
|
||||
@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
|
||||
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
radix_tree_tag_set(&pag->pag_ici_root,
|
||||
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
|
||||
__xfs_inode_set_reclaim_tag(pag, ip);
|
||||
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
|
@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
|
||||
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
|
||||
|
||||
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
|
||||
void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
|
||||
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
|
||||
struct xfs_inode *ip);
|
||||
|
||||
|
@ -134,80 +134,82 @@ xfs_iget_cache_hit(
|
||||
int flags,
|
||||
int lock_flags) __releases(pag->pag_ici_lock)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
int error = EAGAIN;
|
||||
int error;
|
||||
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
|
||||
/*
|
||||
* If INEW is set this inode is being set up
|
||||
* If IRECLAIM is set this inode is being torn down
|
||||
* Pause and try again.
|
||||
* If we are racing with another cache hit that is currently
|
||||
* instantiating this inode or currently recycling it out of
|
||||
* reclaimabe state, wait for the initialisation to complete
|
||||
* before continuing.
|
||||
*
|
||||
* XXX(hch): eventually we should do something equivalent to
|
||||
* wait_on_inode to wait for these flags to be cleared
|
||||
* instead of polling for it.
|
||||
*/
|
||||
if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
|
||||
if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
|
||||
XFS_STATS_INC(xs_ig_frecycle);
|
||||
error = EAGAIN;
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
/* If IRECLAIMABLE is set, we've torn down the vfs inode part */
|
||||
if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
|
||||
|
||||
/*
|
||||
* If lookup is racing with unlink, then we should return an
|
||||
* error immediately so we don't remove it from the reclaim
|
||||
* list and potentially leak the inode.
|
||||
*/
|
||||
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
|
||||
error = ENOENT;
|
||||
goto out_error;
|
||||
}
|
||||
/*
|
||||
* If lookup is racing with unlink return an error immediately.
|
||||
*/
|
||||
if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
|
||||
error = ENOENT;
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If IRECLAIMABLE is set, we've torn down the VFS inode already.
|
||||
* Need to carefully get it back into useable state.
|
||||
*/
|
||||
if (ip->i_flags & XFS_IRECLAIMABLE) {
|
||||
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
|
||||
|
||||
/*
|
||||
* We need to re-initialise the VFS inode as it has been
|
||||
* 'freed' by the VFS. Do this here so we can deal with
|
||||
* errors cleanly, then tag it so it can be set up correctly
|
||||
* later.
|
||||
* We need to set XFS_INEW atomically with clearing the
|
||||
* reclaimable tag so that we do have an indicator of the
|
||||
* inode still being initialized.
|
||||
*/
|
||||
if (!inode_init_always(mp->m_super, VFS_I(ip))) {
|
||||
error = ENOMEM;
|
||||
ip->i_flags |= XFS_INEW;
|
||||
ip->i_flags &= ~XFS_IRECLAIMABLE;
|
||||
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
|
||||
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
|
||||
error = -inode_init_always(mp->m_super, inode);
|
||||
if (error) {
|
||||
/*
|
||||
* Re-initializing the inode failed, and we are in deep
|
||||
* trouble. Try to re-add it to the reclaim list.
|
||||
*/
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
|
||||
ip->i_flags &= ~XFS_INEW;
|
||||
ip->i_flags |= XFS_IRECLAIMABLE;
|
||||
__xfs_inode_set_reclaim_tag(pag, ip);
|
||||
goto out_error;
|
||||
}
|
||||
inode->i_state = I_LOCK|I_NEW;
|
||||
} else {
|
||||
/* If the VFS inode is being torn down, pause and try again. */
|
||||
if (!igrab(inode)) {
|
||||
error = EAGAIN;
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We must set the XFS_INEW flag before clearing the
|
||||
* XFS_IRECLAIMABLE flag so that if a racing lookup does
|
||||
* not find the XFS_IRECLAIMABLE above but has the igrab()
|
||||
* below succeed we can safely check XFS_INEW to detect
|
||||
* that this inode is still being initialised.
|
||||
*/
|
||||
xfs_iflags_set(ip, XFS_INEW);
|
||||
xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
|
||||
|
||||
/* clear the radix tree reclaim flag as well. */
|
||||
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
|
||||
} else if (!igrab(VFS_I(ip))) {
|
||||
/* If the VFS inode is being torn down, pause and try again. */
|
||||
XFS_STATS_INC(xs_ig_frecycle);
|
||||
goto out_error;
|
||||
} else if (xfs_iflags_test(ip, XFS_INEW)) {
|
||||
/*
|
||||
* We are racing with another cache hit that is
|
||||
* currently recycling this inode out of the XFS_IRECLAIMABLE
|
||||
* state. Wait for the initialisation to complete before
|
||||
* continuing.
|
||||
*/
|
||||
wait_on_inode(VFS_I(ip));
|
||||
/* We've got a live one. */
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
}
|
||||
|
||||
if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
|
||||
error = ENOENT;
|
||||
iput(VFS_I(ip));
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
/* We've got a live one. */
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
|
||||
if (lock_flags != 0)
|
||||
xfs_ilock(ip, lock_flags);
|
||||
|
||||
@ -217,6 +219,7 @@ xfs_iget_cache_hit(
|
||||
return 0;
|
||||
|
||||
out_error:
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
return error;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user