forked from Minki/linux
xfs: introduce mmap/truncate lock
Right now we cannot serialise mmap against truncate or hole punch sanely. ->page_mkwrite is not able to take locks that the read IO path normally takes (i.e. the inode iolock) because that could result in lock inversions (read - iolock - page fault - page_mkwrite - iolock) and so we cannot use an IO path lock to serialise page write faults against truncate operations. Instead, introduce a new lock that is used *only* in the ->page_mkwrite path that is the equivalent of the iolock. The lock ordering in a page fault is i_mmaplock -> page lock -> i_ilock, and so in truncate we can i_iolock -> i_mmaplock and so lock out new write faults during the process of truncation. Because i_mmap_lock is outside the page lock, we can hold it across all the same operations we hold the i_iolock for. The only difference is that we never hold the i_mmaplock in the normal IO path and so do not ever have the possibility that we can page fault inside it. Hence there are no recursion issues on the i_mmap_lock and so we can use it to serialise page fault IO against inode modification operations that affect the IO path. This patch introduces the i_mmaplock infrastructure, lockdep annotations and initialisation/destruction code. Use of the new lock will be in subsequent patches. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
c517d838eb
commit
653c60b633
@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
|
||||
}
|
||||
|
||||
/*
|
||||
* The xfs inode contains 2 locks: a multi-reader lock called the
|
||||
* i_iolock and a multi-reader lock called the i_lock. This routine
|
||||
* allows either or both of the locks to be obtained.
|
||||
* The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
|
||||
* the i_lock. This routine allows various combinations of the locks to be
|
||||
* obtained.
|
||||
*
|
||||
* The 2 locks should always be ordered so that the IO lock is
|
||||
* obtained first in order to prevent deadlock.
|
||||
* The 3 locks should always be ordered so that the IO lock is obtained first,
|
||||
* the mmap lock second and the ilock last in order to prevent deadlock.
|
||||
*
|
||||
* ip -- the inode being locked
|
||||
* lock_flags -- this parameter indicates the inode's locks
|
||||
* to be locked. It can be:
|
||||
* XFS_IOLOCK_SHARED,
|
||||
* XFS_IOLOCK_EXCL,
|
||||
* XFS_ILOCK_SHARED,
|
||||
* XFS_ILOCK_EXCL,
|
||||
* XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
|
||||
* XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
|
||||
* XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
|
||||
* XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
|
||||
* Basic locking order:
|
||||
*
|
||||
* i_iolock -> i_mmap_lock -> page_lock -> i_ilock
|
||||
*
|
||||
* mmap_sem locking order:
|
||||
*
|
||||
* i_iolock -> page lock -> mmap_sem
|
||||
* mmap_sem -> i_mmap_lock -> page_lock
|
||||
*
|
||||
* The difference in mmap_sem locking order mean that we cannot hold the
|
||||
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
|
||||
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
|
||||
* in get_user_pages() to map the user pages into the kernel address space for
|
||||
* direct IO. Similarly the i_iolock cannot be taken inside a page fault because
|
||||
* page faults already hold the mmap_sem.
|
||||
*
|
||||
* Hence to serialise fully against both syscall and mmap based IO, we need to
|
||||
* take both the i_iolock and the i_mmap_lock. These locks should *only* be both
|
||||
* taken in places where we need to invalidate the page cache in a race
|
||||
* free manner (e.g. truncate, hole punch and other extent manipulation
|
||||
* functions).
|
||||
*/
|
||||
void
|
||||
xfs_ilock(
|
||||
@ -150,6 +160,8 @@ xfs_ilock(
|
||||
*/
|
||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||
@ -159,6 +171,11 @@ xfs_ilock(
|
||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
|
||||
|
||||
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||
mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
|
||||
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||
mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
|
||||
|
||||
if (lock_flags & XFS_ILOCK_EXCL)
|
||||
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
|
||||
else if (lock_flags & XFS_ILOCK_SHARED)
|
||||
@ -191,6 +208,8 @@ xfs_ilock_nowait(
|
||||
*/
|
||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||
@ -202,21 +221,35 @@ xfs_ilock_nowait(
|
||||
if (!mrtryaccess(&ip->i_iolock))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (lock_flags & XFS_MMAPLOCK_EXCL) {
|
||||
if (!mrtryupdate(&ip->i_mmaplock))
|
||||
goto out_undo_iolock;
|
||||
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
|
||||
if (!mrtryaccess(&ip->i_mmaplock))
|
||||
goto out_undo_iolock;
|
||||
}
|
||||
|
||||
if (lock_flags & XFS_ILOCK_EXCL) {
|
||||
if (!mrtryupdate(&ip->i_lock))
|
||||
goto out_undo_iolock;
|
||||
goto out_undo_mmaplock;
|
||||
} else if (lock_flags & XFS_ILOCK_SHARED) {
|
||||
if (!mrtryaccess(&ip->i_lock))
|
||||
goto out_undo_iolock;
|
||||
goto out_undo_mmaplock;
|
||||
}
|
||||
return 1;
|
||||
|
||||
out_undo_iolock:
|
||||
out_undo_mmaplock:
|
||||
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||
mrunlock_excl(&ip->i_mmaplock);
|
||||
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||
mrunlock_shared(&ip->i_mmaplock);
|
||||
out_undo_iolock:
|
||||
if (lock_flags & XFS_IOLOCK_EXCL)
|
||||
mrunlock_excl(&ip->i_iolock);
|
||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||
mrunlock_shared(&ip->i_iolock);
|
||||
out:
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -244,6 +277,8 @@ xfs_iunlock(
|
||||
*/
|
||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||
@ -254,6 +289,11 @@ xfs_iunlock(
|
||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||
mrunlock_shared(&ip->i_iolock);
|
||||
|
||||
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||
mrunlock_excl(&ip->i_mmaplock);
|
||||
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||
mrunlock_shared(&ip->i_mmaplock);
|
||||
|
||||
if (lock_flags & XFS_ILOCK_EXCL)
|
||||
mrunlock_excl(&ip->i_lock);
|
||||
else if (lock_flags & XFS_ILOCK_SHARED)
|
||||
@ -271,11 +311,14 @@ xfs_ilock_demote(
|
||||
xfs_inode_t *ip,
|
||||
uint lock_flags)
|
||||
{
|
||||
ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
|
||||
ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
|
||||
ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
|
||||
ASSERT((lock_flags &
|
||||
~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
|
||||
|
||||
if (lock_flags & XFS_ILOCK_EXCL)
|
||||
mrdemote(&ip->i_lock);
|
||||
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||
mrdemote(&ip->i_mmaplock);
|
||||
if (lock_flags & XFS_IOLOCK_EXCL)
|
||||
mrdemote(&ip->i_iolock);
|
||||
|
||||
@ -294,6 +337,12 @@ xfs_isilocked(
|
||||
return rwsem_is_locked(&ip->i_lock.mr_lock);
|
||||
}
|
||||
|
||||
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
|
||||
if (!(lock_flags & XFS_MMAPLOCK_SHARED))
|
||||
return !!ip->i_mmaplock.mr_writer;
|
||||
return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
|
||||
}
|
||||
|
||||
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
|
||||
if (!(lock_flags & XFS_IOLOCK_SHARED))
|
||||
return !!ip->i_iolock.mr_writer;
|
||||
@ -314,14 +363,27 @@ int xfs_lock_delays;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Bump the subclass so xfs_lock_inodes() acquires each lock with
|
||||
* a different value
|
||||
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
|
||||
* value. This shouldn't be called for page fault locking, but we also need to
|
||||
* ensure we don't overrun the number of lockdep subclasses for the iolock or
|
||||
* mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
|
||||
*/
|
||||
static inline int
|
||||
xfs_lock_inumorder(int lock_mode, int subclass)
|
||||
{
|
||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
|
||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
|
||||
ASSERT(subclass + XFS_LOCK_INUMORDER <
|
||||
(1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
|
||||
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
|
||||
}
|
||||
|
||||
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
|
||||
ASSERT(subclass + XFS_LOCK_INUMORDER <
|
||||
(1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
|
||||
lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
|
||||
XFS_MMAPLOCK_SHIFT;
|
||||
}
|
||||
|
||||
if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
|
||||
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
|
||||
|
||||
@ -440,10 +502,10 @@ again:
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_lock_two_inodes() can only be used to lock one type of lock
|
||||
* at a time - the iolock or the ilock, but not both at once. If
|
||||
* we lock both at once, lockdep will report false positives saying
|
||||
* we have violated locking orders.
|
||||
* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
|
||||
* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
|
||||
* lock more than one at a time, lockdep will report false positives saying we
|
||||
* have violated locking orders.
|
||||
*/
|
||||
void
|
||||
xfs_lock_two_inodes(
|
||||
@ -455,8 +517,12 @@ xfs_lock_two_inodes(
|
||||
int attempts = 0;
|
||||
xfs_log_item_t *lp;
|
||||
|
||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
|
||||
ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
|
||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
|
||||
ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
|
||||
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
|
||||
} else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
|
||||
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
|
||||
|
||||
ASSERT(ip0->i_ino != ip1->i_ino);
|
||||
|
||||
if (ip0->i_ino > ip1->i_ino) {
|
||||
|
@ -56,6 +56,7 @@ typedef struct xfs_inode {
|
||||
struct xfs_inode_log_item *i_itemp; /* logging information */
|
||||
mrlock_t i_lock; /* inode lock */
|
||||
mrlock_t i_iolock; /* inode IO lock */
|
||||
mrlock_t i_mmaplock; /* inode mmap IO lock */
|
||||
atomic_t i_pincount; /* inode pin count */
|
||||
spinlock_t i_flags_lock; /* inode i_flags lock */
|
||||
/* Miscellaneous state. */
|
||||
@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
|
||||
#define XFS_IOLOCK_SHARED (1<<1)
|
||||
#define XFS_ILOCK_EXCL (1<<2)
|
||||
#define XFS_ILOCK_SHARED (1<<3)
|
||||
#define XFS_MMAPLOCK_EXCL (1<<4)
|
||||
#define XFS_MMAPLOCK_SHARED (1<<5)
|
||||
|
||||
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
|
||||
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
|
||||
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
|
||||
| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
|
||||
|
||||
#define XFS_LOCK_FLAGS \
|
||||
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
|
||||
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
|
||||
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
|
||||
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
|
||||
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
|
||||
{ XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
|
||||
{ XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
|
||||
|
||||
|
||||
/*
|
||||
@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
|
||||
#define XFS_IOLOCK_SHIFT 16
|
||||
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
|
||||
|
||||
#define XFS_MMAPLOCK_SHIFT 20
|
||||
|
||||
#define XFS_ILOCK_SHIFT 24
|
||||
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
|
||||
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
|
||||
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
|
||||
|
||||
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
|
||||
#define XFS_IOLOCK_DEP_MASK 0x000f0000
|
||||
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
|
||||
#define XFS_ILOCK_DEP_MASK 0xff000000
|
||||
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
|
||||
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
|
||||
XFS_MMAPLOCK_DEP_MASK | \
|
||||
XFS_ILOCK_DEP_MASK)
|
||||
|
||||
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
|
||||
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
|
||||
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
|
||||
>> XFS_IOLOCK_SHIFT)
|
||||
#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
|
||||
>> XFS_MMAPLOCK_SHIFT)
|
||||
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
|
||||
>> XFS_ILOCK_SHIFT)
|
||||
|
||||
/*
|
||||
* For multiple groups support: if S_ISGID bit is set in the parent
|
||||
|
@ -986,6 +986,8 @@ xfs_fs_inode_init_once(
|
||||
atomic_set(&ip->i_pincount, 0);
|
||||
spin_lock_init(&ip->i_flags_lock);
|
||||
|
||||
mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
||||
"xfsino", ip->i_ino);
|
||||
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
||||
"xfsino", ip->i_ino);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user