Merge branch 'xfs-misc-fixes-for-4.3-2' into for-next

This commit is contained in:
Dave Chinner 2015-08-20 09:28:45 +10:00
commit aa493382cb
15 changed files with 233 additions and 104 deletions

View File

@ -139,6 +139,8 @@ xfs_attr_get(
args.value = value; args.value = value;
args.valuelen = *valuelenp; args.valuelen = *valuelenp;
/* Entirely possible to look up a name which doesn't exist */
args.op_flags = XFS_DA_OP_OKNOENT;
lock_mode = xfs_ilock_attr_map_shared(ip); lock_mode = xfs_ilock_attr_map_shared(ip);
if (!xfs_inode_hasattr(ip)) if (!xfs_inode_hasattr(ip))

View File

@ -1822,6 +1822,7 @@ xfs_da3_path_shift(
struct xfs_da_args *args; struct xfs_da_args *args;
struct xfs_da_node_entry *btree; struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr; struct xfs_da3_icnode_hdr nodehdr;
struct xfs_buf *bp;
xfs_dablk_t blkno = 0; xfs_dablk_t blkno = 0;
int level; int level;
int error; int error;
@ -1866,20 +1867,24 @@ xfs_da3_path_shift(
*/ */
for (blk++, level++; level < path->active; blk++, level++) { for (blk++, level++; level < path->active; blk++, level++) {
/* /*
* Release the old block. * Read the next child block into a local buffer.
* (if it's dirty, trans won't actually let go) */
error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
args->whichfork);
if (error)
return error;
/*
* Release the old block (if it's dirty, the trans doesn't
* actually let go) and swap the local buffer into the path
* structure. This ensures failure of the above read doesn't set
* a NULL buffer in an active slot in the path.
*/ */
if (release) if (release)
xfs_trans_brelse(args->trans, blk->bp); xfs_trans_brelse(args->trans, blk->bp);
/*
* Read the next child block.
*/
blk->blkno = blkno; blk->blkno = blkno;
error = xfs_da3_node_read(args->trans, dp, blkno, -1, blk->bp = bp;
&blk->bp, args->whichfork);
if (error)
return error;
info = blk->bp->b_addr; info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||

View File

@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote {
typedef struct xfs_attr_leafblock { typedef struct xfs_attr_leafblock {
xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ /*
xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ * The rest of the block contains the following structures after the
* leaf entries, growing from the bottom up. The variables are never
* referenced and definining them can actually make gcc optimize away
* accesses to the 'entries' array above index 0 so don't do that.
*
* xfs_attr_leaf_name_local_t namelist;
* xfs_attr_leaf_name_remote_t valuelist;
*/
} xfs_attr_leafblock_t; } xfs_attr_leafblock_t;
/* /*

View File

@ -362,6 +362,7 @@ xfs_dir_lookup(
struct xfs_da_args *args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
int lock_mode;
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup); XFS_STATS_INC(xs_dir_lookup);
@ -387,6 +388,7 @@ xfs_dir_lookup(
if (ci_name) if (ci_name)
args->op_flags |= XFS_DA_OP_CILOOKUP; args->op_flags |= XFS_DA_OP_CILOOKUP;
lock_mode = xfs_ilock_data_map_shared(dp);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_lookup(args); rval = xfs_dir2_sf_lookup(args);
goto out_check_rval; goto out_check_rval;
@ -419,6 +421,7 @@ out_check_rval:
} }
} }
out_free: out_free:
xfs_iunlock(dp, lock_mode);
kmem_free(args); kmem_free(args);
return rval; return rval;
} }

View File

@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify(
return; return;
case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
xfs_dir3_data_verify(bp); bp->b_ops = &xfs_dir3_data_buf_ops;
bp->b_ops->verify_read(bp);
return; return;
default: default:
xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_buf_ioerror(bp, -EFSCORRUPTED);

View File

@ -186,9 +186,6 @@ xfs_mount_validate_sb(
if (xfs_sb_version_hassparseinodes(sbp)) { if (xfs_sb_version_hassparseinodes(sbp)) {
uint32_t align; uint32_t align;
xfs_alert(mp,
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
>> sbp->sb_blocklog; >> sbp->sb_blocklog;
if (sbp->sb_inoalignmt != align) { if (sbp->sb_inoalignmt != align) {

View File

@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
int wantoff; /* starting block offset */ int wantoff; /* starting block offset */
xfs_off_t cook; xfs_off_t cook;
struct xfs_da_geometry *geo = args->geo; struct xfs_da_geometry *geo = args->geo;
int lock_mode;
/* /*
* If the block number in the offset is out of range, we're done. * If the block number in the offset is out of range, we're done.
@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
return 0; return 0;
lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir3_block_read(NULL, dp, &bp); error = xfs_dir3_block_read(NULL, dp, &bp);
xfs_iunlock(dp, lock_mode);
if (error) if (error)
return error; return error;
@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
* current buffer, need to get another one. * current buffer, need to get another one.
*/ */
if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
int lock_mode;
lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
&curoff, &bp); &curoff, &bp);
xfs_iunlock(dp, lock_mode);
if (error || !map_info->map_valid) if (error || !map_info->map_valid)
break; break;
@ -653,7 +659,6 @@ xfs_readdir(
struct xfs_da_args args = { NULL }; struct xfs_da_args args = { NULL };
int rval; int rval;
int v; int v;
uint lock_mode;
trace_xfs_readdir(dp); trace_xfs_readdir(dp);
@ -666,7 +671,7 @@ xfs_readdir(
args.dp = dp; args.dp = dp;
args.geo = dp->i_mount->m_dir_geo; args.geo = dp->i_mount->m_dir_geo;
lock_mode = xfs_ilock_data_map_shared(dp); xfs_ilock(dp, XFS_IOLOCK_SHARED);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx); rval = xfs_dir2_sf_getdents(&args, ctx);
else if ((rval = xfs_dir2_isblock(&args, &v))) else if ((rval = xfs_dir2_isblock(&args, &v)))
@ -675,7 +680,7 @@ xfs_readdir(
rval = xfs_dir2_block_getdents(&args, ctx); rval = xfs_dir2_block_getdents(&args, ctx);
else else
rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
xfs_iunlock(dp, lock_mode); xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return rval; return rval;
} }

View File

@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk(
d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_id = cpu_to_be32(curid);
d->dd_diskdq.d_flags = type; d->dd_diskdq.d_flags = type;
if (xfs_sb_version_hascrc(&mp->m_sb)) { if (xfs_sb_version_hascrc(&mp->m_sb)) {
uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF); XFS_DQUOT_CRC_OFF);
} }

View File

@ -317,24 +317,33 @@ xfs_file_read_iter(
return -EIO; return -EIO;
/* /*
* Locking is a bit tricky here. If we take an exclusive lock * Locking is a bit tricky here. If we take an exclusive lock for direct
* for direct IO, we effectively serialise all new concurrent * IO, we effectively serialise all new concurrent read IO to this file
* read IO to this file and block it behind IO that is currently in * and block it behind IO that is currently in progress because IO in
* progress because IO in progress holds the IO lock shared. We only * progress holds the IO lock shared. We only need to hold the lock
* need to hold the lock exclusive to blow away the page cache, so * exclusive to blow away the page cache, so only take lock exclusively
* only take lock exclusively if the page cache needs invalidation. * if the page cache needs invalidation. This allows the normal direct
* This allows the normal direct IO case of no page cache pages to * IO case of no page cache pages to proceeed concurrently without
* proceeed concurrently without serialisation. * serialisation.
*/ */
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
/*
* The generic dio code only flushes the range of the particular
* I/O. Because we take an exclusive lock here, this whole
* sequence is considerably more expensive for us. This has a
* noticeable performance impact for any file with cached pages,
* even when outside of the range of the particular I/O.
*
* Hence, amortize the cost of the lock against a full file
* flush and reduce the chances of repeated iolock cycles going
* forward.
*/
if (inode->i_mapping->nrpages) { if (inode->i_mapping->nrpages) {
ret = filemap_write_and_wait_range( ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
VFS_I(ip)->i_mapping,
pos, pos + size - 1);
if (ret) { if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret; return ret;
@ -345,9 +354,7 @@ xfs_file_read_iter(
* we fail to invalidate a page, but this should never * we fail to invalidate a page, but this should never
* happen on XFS. Warn if it does fail. * happen on XFS. Warn if it does fail.
*/ */
ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
pos >> PAGE_CACHE_SHIFT,
(pos + size - 1) >> PAGE_CACHE_SHIFT);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
ret = 0; ret = 0;
} }
@ -733,19 +740,19 @@ xfs_file_dio_aio_write(
pos = iocb->ki_pos; pos = iocb->ki_pos;
end = pos + count - 1; end = pos + count - 1;
/*
* See xfs_file_read_iter() for why we do a full-file flush here.
*/
if (mapping->nrpages) { if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
pos, end);
if (ret) if (ret)
goto out; goto out;
/* /*
* Invalidate whole pages. This can return an error if * Invalidate whole pages. This can return an error if we fail
* we fail to invalidate a page, but this should never * to invalidate a page, but this should never happen on XFS.
* happen on XFS. Warn if it does fail. * Warn if it does fail.
*/ */
ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
pos >> PAGE_CACHE_SHIFT,
end >> PAGE_CACHE_SHIFT);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
ret = 0; ret = 0;
} }

View File

@ -250,7 +250,7 @@ xfs_growfs_data_private(
agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_freeblks = cpu_to_be32(tmpsize);
agf->agf_longest = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize);
if (xfs_sb_version_hascrc(&mp->m_sb)) if (xfs_sb_version_hascrc(&mp->m_sb))
uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid); uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
error = xfs_bwrite(bp); error = xfs_bwrite(bp);
xfs_buf_relse(bp); xfs_buf_relse(bp);
@ -273,7 +273,7 @@ xfs_growfs_data_private(
if (xfs_sb_version_hascrc(&mp->m_sb)) { if (xfs_sb_version_hascrc(&mp->m_sb)) {
agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
agfl->agfl_seqno = cpu_to_be32(agno); agfl->agfl_seqno = cpu_to_be32(agno);
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
} }
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
@ -309,7 +309,7 @@ xfs_growfs_data_private(
agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_newino = cpu_to_be32(NULLAGINO);
agi->agi_dirino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO);
if (xfs_sb_version_hascrc(&mp->m_sb)) if (xfs_sb_version_hascrc(&mp->m_sb))
uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
if (xfs_sb_version_hasfinobt(&mp->m_sb)) { if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
agi->agi_free_level = cpu_to_be32(1); agi->agi_free_level = cpu_to_be32(1);

View File

@ -164,7 +164,7 @@ xfs_ilock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@ -212,7 +212,7 @@ xfs_ilock_nowait(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) { if (lock_flags & XFS_IOLOCK_EXCL) {
if (!mrtryupdate(&ip->i_iolock)) if (!mrtryupdate(&ip->i_iolock))
@ -281,7 +281,7 @@ xfs_iunlock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0); ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL) if (lock_flags & XFS_IOLOCK_EXCL)
@ -362,32 +362,52 @@ int xfs_lots_retries;
int xfs_lock_delays; int xfs_lock_delays;
#endif #endif
#ifdef CONFIG_LOCKDEP
static bool
xfs_lockdep_subclass_ok(
int subclass)
{
return subclass < MAX_LOCKDEP_SUBCLASSES;
}
#else
#define xfs_lockdep_subclass_ok(subclass) (true)
#endif
/* /*
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
* value. This shouldn't be called for page fault locking, but we also need to * value. This can be called for any type of inode lock combination, including
* ensure we don't overrun the number of lockdep subclasses for the iolock or * parent locking. Care must be taken to ensure we don't overrun the subclass
* mmaplock as that is limited to 12 by the mmap lock lockdep annotations. * storage fields in the class mask we build.
*/ */
static inline int static inline int
xfs_lock_inumorder(int lock_mode, int subclass) xfs_lock_inumorder(int lock_mode, int subclass)
{ {
int class = 0;
ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
XFS_ILOCK_RTSUM)));
ASSERT(xfs_lockdep_subclass_ok(subclass));
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
ASSERT(subclass + XFS_LOCK_INUMORDER < ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
(1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); ASSERT(xfs_lockdep_subclass_ok(subclass +
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; XFS_IOLOCK_PARENT_VAL));
class += subclass << XFS_IOLOCK_SHIFT;
if (lock_mode & XFS_IOLOCK_PARENT)
class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
} }
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
ASSERT(subclass + XFS_LOCK_INUMORDER < ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
(1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); class += subclass << XFS_MMAPLOCK_SHIFT;
lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
XFS_MMAPLOCK_SHIFT;
} }
if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
class += subclass << XFS_ILOCK_SHIFT;
}
return lock_mode; return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
} }
/* /*
@ -399,6 +419,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
* transaction (such as truncate). This can result in deadlock since the long * transaction (such as truncate). This can result in deadlock since the long
* running trans might need to wait for the inode we just locked in order to * running trans might need to wait for the inode we just locked in order to
* push the tail and free space in the log. * push the tail and free space in the log.
*
* xfs_lock_inodes() can only be used to lock one type of lock at a time -
* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
* lock more than one at a time, lockdep will report false positives saying we
* have violated locking orders.
*/ */
void void
xfs_lock_inodes( xfs_lock_inodes(
@ -409,8 +434,29 @@ xfs_lock_inodes(
int attempts = 0, i, j, try_lock; int attempts = 0, i, j, try_lock;
xfs_log_item_t *lp; xfs_log_item_t *lp;
/* currently supports between 2 and 5 inodes */ /*
* Currently supports between 2 and 5 inodes with exclusive locking. We
* support an arbitrary depth of locking here, but absolute limits on
* inodes depend on the the type of locking and the limits placed by
* lockdep annotations in xfs_lock_inumorder. These are all checked by
* the asserts.
*/
ASSERT(ips && inodes >= 2 && inodes <= 5); ASSERT(ips && inodes >= 2 && inodes <= 5);
ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
XFS_ILOCK_EXCL));
ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
XFS_ILOCK_SHARED)));
ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
if (lock_mode & XFS_IOLOCK_EXCL) {
ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
} else if (lock_mode & XFS_MMAPLOCK_EXCL)
ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
try_lock = 0; try_lock = 0;
i = 0; i = 0;
@ -629,30 +675,29 @@ xfs_lookup(
{ {
xfs_ino_t inum; xfs_ino_t inum;
int error; int error;
uint lock_mode;
trace_xfs_lookup(dp, name); trace_xfs_lookup(dp, name);
if (XFS_FORCED_SHUTDOWN(dp->i_mount)) if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO; return -EIO;
lock_mode = xfs_ilock_data_map_shared(dp); xfs_ilock(dp, XFS_IOLOCK_SHARED);
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
xfs_iunlock(dp, lock_mode);
if (error) if (error)
goto out; goto out_unlock;
error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
if (error) if (error)
goto out_free_name; goto out_free_name;
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return 0; return 0;
out_free_name: out_free_name:
if (ci_name) if (ci_name)
kmem_free(ci_name->name); kmem_free(ci_name->name);
out: out_unlock:
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
*ipp = NULL; *ipp = NULL;
return error; return error;
} }
@ -787,7 +832,7 @@ xfs_ialloc(
if (ip->i_d.di_version == 3) { if (ip->i_d.di_version == 3) {
ASSERT(ip->i_d.di_ino == ino); ASSERT(ip->i_d.di_ino == ino);
ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
ip->i_d.di_crc = 0; ip->i_d.di_crc = 0;
ip->i_d.di_changecount = 1; ip->i_d.di_changecount = 1;
ip->i_d.di_lsn = 0; ip->i_d.di_lsn = 0;
@ -1149,7 +1194,8 @@ xfs_create(
goto out_trans_cancel; goto out_trans_cancel;
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
unlock_dp_on_error = true; unlock_dp_on_error = true;
xfs_bmap_init(&free_list, &first_block); xfs_bmap_init(&free_list, &first_block);
@ -1185,7 +1231,7 @@ xfs_create(
* the transaction cancel unlocking dp so don't do it explicitly in the * the transaction cancel unlocking dp so don't do it explicitly in the
* error path. * error path.
*/ */
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
unlock_dp_on_error = false; unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino, error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@ -1258,7 +1304,7 @@ xfs_create(
xfs_qm_dqrele(pdqp); xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error) if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return error; return error;
} }
@ -1403,10 +1449,11 @@ xfs_link(
if (error) if (error)
goto error_return; goto error_return;
xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
/* /*
* If we are using project inheritance, we only allow hard link * If we are using project inheritance, we only allow hard link
@ -2510,9 +2557,10 @@ xfs_remove(
goto out_trans_cancel; goto out_trans_cancel;
} }
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/* /*
@ -2893,6 +2941,12 @@ xfs_rename(
* whether the target directory is the same as the source * whether the target directory is the same as the source
* directory, we can lock from 2 to 4 inodes. * directory, we can lock from 2 to 4 inodes.
*/ */
if (!new_parent)
xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
else
xfs_lock_two_inodes(src_dp, target_dp,
XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
/* /*
@ -2900,9 +2954,9 @@ xfs_rename(
* we can rely on either trans_commit or trans_cancel to unlock * we can rely on either trans_commit or trans_cancel to unlock
* them. * them.
*/ */
xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
if (new_parent) if (new_parent)
xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
if (target_ip) if (target_ip)
xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);

View File

@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* Flags for lockdep annotations. * Flags for lockdep annotations.
* *
* XFS_LOCK_PARENT - for directory operations that require locking a * XFS_LOCK_PARENT - for directory operations that require locking a
* parent directory inode and a child entry inode. The parent gets locked * parent directory inode and a child entry inode. IOLOCK requires nesting,
* with this flag so it gets a lockdep subclass of 1 and the child entry * MMAPLOCK does not support this class, ILOCK requires a single subclass
* lock will have a lockdep subclass of 0. * to differentiate parent from child.
* *
* XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
* inodes do not participate in the normal lock order, and thus have their * inodes do not participate in the normal lock order, and thus have their
@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* XFS_LOCK_INUMORDER - for locking several inodes at the some time * XFS_LOCK_INUMORDER - for locking several inodes at the some time
* with xfs_lock_inodes(). This flag is used as the starting subclass * with xfs_lock_inodes(). This flag is used as the starting subclass
* and each subsequent lock acquired will increment the subclass by one. * and each subsequent lock acquired will increment the subclass by one.
* So the first lock acquired will have a lockdep subclass of 4, the * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
* second lock will have a lockdep subclass of 5, and so on. It is * limited to the subclasses we can represent via nesting. We need at least
* the responsibility of the class builder to shift this to the correct * 5 inodes nest depth for the ILOCK through rename, and we also have to support
* portion of the lock_mode lockdep mask. * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
* and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
* 8 subclasses supported by lockdep.
*
* This also means we have to number the sub-classes in the lowest bits of
* the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
* mask and we can't use bit-masking to build the subclasses. What a mess.
*
* Bit layout:
*
* Bit Lock Region
* 16-19 XFS_IOLOCK_SHIFT dependencies
* 20-23 XFS_MMAPLOCK_SHIFT dependencies
* 24-31 XFS_ILOCK_SHIFT dependencies
*
* IOLOCK values
*
* 0-3 subclass value
* 4-7 PARENT subclass values
*
* MMAPLOCK values
*
* 0-3 subclass value
* 4-7 unused
*
* ILOCK values
* 0-4 subclass values
* 5 PARENT subclass (not nestable)
* 6 RTBITMAP subclass (not nestable)
* 7 RTSUM subclass (not nestable)
*
*/ */
#define XFS_LOCK_PARENT 1 #define XFS_IOLOCK_SHIFT 16
#define XFS_LOCK_RTBITMAP 2 #define XFS_IOLOCK_PARENT_VAL 4
#define XFS_LOCK_RTSUM 3 #define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
#define XFS_LOCK_INUMORDER 4 #define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
#define XFS_IOLOCK_SHIFT 16 #define XFS_MMAPLOCK_SHIFT 20
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_MMAPLOCK_NUMORDER 0
#define XFS_MMAPLOCK_MAX_SUBCLASS 3
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_MMAPLOCK_SHIFT 20 #define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT_VAL 5
#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
#define XFS_ILOCK_RTBITMAP_VAL 6
#define XFS_ILOCK_RTSUM_VAL 7
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_SHIFT 24 #define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK) XFS_ILOCK_DEP_MASK)

View File

@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn(
*/ */
goto recover_immediately; goto recover_immediately;
case XFS_SB_MAGIC: case XFS_SB_MAGIC:
/*
* superblock uuids are magic. We may or may not have a
* sb_meta_uuid on disk, but it will be set in the in-core
* superblock. We set the uuid pointer for verification
* according to the superblock feature mask to ensure we check
* the relevant UUID in the superblock.
*/
lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
uuid = &((struct xfs_dsb *)blk)->sb_uuid; if (xfs_sb_version_hasmetauuid(&mp->m_sb))
uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
else
uuid = &((struct xfs_dsb *)blk)->sb_uuid;
break; break;
default: default:
break; break;
} }
if (lsn != (xfs_lsn_t)-1) { if (lsn != (xfs_lsn_t)-1) {
if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
goto recover_immediately; goto recover_immediately;
return lsn; return lsn;
} }

View File

@ -1528,6 +1528,10 @@ xfs_fs_fill_super(
} }
} }
if (xfs_sb_version_hassparseinodes(&mp->m_sb))
xfs_alert(mp,
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
error = xfs_mountfs(mp); error = xfs_mountfs(mp);
if (error) if (error)
goto out_filestream_unmount; goto out_filestream_unmount;

View File

@ -240,7 +240,8 @@ xfs_symlink(
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
unlock_dp_on_error = true; unlock_dp_on_error = true;
/* /*
@ -288,7 +289,7 @@ xfs_symlink(
* the transaction cancel unlocking dp so don't do it explicitly in the * the transaction cancel unlocking dp so don't do it explicitly in the
* error path. * error path.
*/ */
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
unlock_dp_on_error = false; unlock_dp_on_error = false;
/* /*
@ -421,7 +422,7 @@ out_release_inode:
xfs_qm_dqrele(pdqp); xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error) if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return error; return error;
} }