xfs: replace i_flock with a sleeping bitlock

We almost never block on i_flock, the exception is synchronous inode
flushing.  Instead of bloating the inode with a 16/24-byte completion
that we abuse as a semaphore just implement it as a bitlock that uses
a bit waitqueue for the rare sleeping path.  This primarily is a
tradeoff between a much smaller inode and a faster non-blocking
path vs faster wakeups, and we are much better off with the former.

A small downside is that we will lose lockdep checking for i_flock, but
given that it's always taken inside the ilock that should be acceptable.

Note that for example the inode writeback locking is implemented in a
very similar way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
Christoph Hellwig 2011-12-18 20:00:09 +00:00 committed by Ben Myers
parent 49e4c70e52
commit 474fce0675
6 changed files with 76 additions and 46 deletions

View File

@ -77,7 +77,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@ -150,7 +150,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always
@ -713,3 +713,19 @@ xfs_isilocked(
return 0;
}
#endif
void
__xfs_iflock(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
finish_wait(wq, &wait.wait);
}

View File

@ -2396,7 +2396,7 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
@ -2512,7 +2512,7 @@ xfs_iflush_int(
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));

View File

@ -237,7 +237,6 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
return ret;
}
static inline int
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
ret = ip->i_flags & flags;
if (!ret)
ip->i_flags |= flags;
spin_unlock(&ip->i_flags_lock);
return ret;
}
/*
* Project quota id helpers (previously projid was 16bit only
* and using two 16bit values to hold new 32bit projid was chosen
@ -343,36 +355,18 @@ xfs_set_projid(struct xfs_inode *ip,
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
}
/*
* Manage the i_flush queue embedded in the inode. This completion
* queue synchronizes processes attempting to flush the in-core
* inode back to disk.
*/
static inline void xfs_iflock(xfs_inode_t *ip)
{
wait_for_completion(&ip->i_flush);
}
static inline int xfs_iflock_nowait(xfs_inode_t *ip)
{
return try_wait_for_completion(&ip->i_flush);
}
static inline void xfs_ifunlock(xfs_inode_t *ip)
{
complete(&ip->i_flush);
}
/*
* In-core inode flags.
*/
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
#define XFS_ISTALE 0x0002 /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
#define XFS_INEW 0x0008 /* inode has just been allocated */
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
#define XFS_INEW (1 << 3) /* inode has just been allocated */
#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
/*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
@ -384,6 +378,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
XFS_IFILESTREAM);
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}
static inline void xfs_iflock(struct xfs_inode *ip)
{
if (!xfs_iflock_nowait(ip))
__xfs_iflock(ip);
}
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_IFLOCK);
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)

View File

@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
* If a flush is not in progress anymore, chances are that the
* inode was taken off the AIL. So, just get out.
*/
if (completion_done(&ip->i_flush) ||
if (!xfs_isiflocked(ip) ||
!(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return true;
@ -750,7 +750,7 @@ xfs_inode_item_push(
struct xfs_inode *ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
/*
* Since we were able to lock the inode's flush lock and

View File

@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
init_waitqueue_head(&ip->i_ipin_wait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&ip->i_flush);
complete(&ip->i_flush);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);

View File

@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
return 1;
/*
* do some unlocked checks first to avoid unnecessary lock traffic.
* The first is a flush lock check, the second is a already in reclaim
* check. Only do these checks if we are not going to block on locks.
* If we are asked for non-blocking operation, do unlocked checks to
* see if the inode already is being flushed or in reclaim to avoid
* lock traffic.
*/
if ((flags & SYNC_TRYLOCK) &&
(!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
__xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
return 1;
}
/*
* The radix tree lock here protects a thread in xfs_iget from racing