forked from Minki/linux
5885ebda87
A new fsync vs power fail test in xfstests indicated that XFS can have unreliable data consistency when doing extending truncates that require block zeroing. The blocks beyond EOF get zeroed in memory, but we never force those changes to disk before we run the transaction that extends the file size and exposes those blocks to userspace. This can result in the blocks not being correctly zeroed after a crash. Because in-memory behaviour is correct, tools like fsx don't pick up any coherency problems - it's not until the filesystem is shutdown or the system crashes after writing the truncate transaction to the journal but before the zeroed data in the page cache is flushed that the issue is exposed. Fix this by also flushing the dirty data in memory region between the old size and new size when we've found blocks that need zeroing in the truncate process. Reported-by: Liu Bo <bo.li.liu@oracle.com> cc: <stable@vger.kernel.org> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
420 lines
12 KiB
C
420 lines
12 KiB
C
/*
|
|
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#ifndef __XFS_INODE_H__
|
|
#define __XFS_INODE_H__
|
|
|
|
#include "xfs_inode_buf.h"
|
|
#include "xfs_inode_fork.h"
|
|
|
|
/*
|
|
* Kernel only inode definitions
|
|
*/
|
|
struct xfs_dinode;
|
|
struct xfs_inode;
|
|
struct xfs_buf;
|
|
struct xfs_bmap_free;
|
|
struct xfs_bmbt_irec;
|
|
struct xfs_inode_log_item;
|
|
struct xfs_mount;
|
|
struct xfs_trans;
|
|
struct xfs_dquot;
|
|
|
|
typedef struct xfs_inode {
|
|
/* Inode linking and identification information. */
|
|
struct xfs_mount *i_mount; /* fs mount struct ptr */
|
|
struct xfs_dquot *i_udquot; /* user dquot */
|
|
struct xfs_dquot *i_gdquot; /* group dquot */
|
|
struct xfs_dquot *i_pdquot; /* project dquot */
|
|
|
|
/* Inode location stuff */
|
|
xfs_ino_t i_ino; /* inode number (agno/agino)*/
|
|
struct xfs_imap i_imap; /* location for xfs_imap() */
|
|
|
|
/* Extent information. */
|
|
xfs_ifork_t *i_afp; /* attribute fork pointer */
|
|
xfs_ifork_t i_df; /* data fork */
|
|
|
|
/* operations vectors */
|
|
const struct xfs_dir_ops *d_ops; /* directory ops vector */
|
|
|
|
/* Transaction and locking information. */
|
|
struct xfs_inode_log_item *i_itemp; /* logging information */
|
|
mrlock_t i_lock; /* inode lock */
|
|
mrlock_t i_iolock; /* inode IO lock */
|
|
atomic_t i_pincount; /* inode pin count */
|
|
spinlock_t i_flags_lock; /* inode i_flags lock */
|
|
/* Miscellaneous state. */
|
|
unsigned long i_flags; /* see defined flags below */
|
|
unsigned int i_delayed_blks; /* count of delay alloc blks */
|
|
|
|
xfs_icdinode_t i_d; /* most of ondisk inode */
|
|
|
|
/* VFS inode */
|
|
struct inode i_vnode; /* embedded VFS inode */
|
|
} xfs_inode_t;
|
|
|
|
/* Convert from vfs inode to xfs inode */
|
|
static inline struct xfs_inode *XFS_I(struct inode *inode)
|
|
{
|
|
return container_of(inode, struct xfs_inode, i_vnode);
|
|
}
|
|
|
|
/* convert from xfs inode to vfs inode */
|
|
static inline struct inode *VFS_I(struct xfs_inode *ip)
|
|
{
|
|
return &ip->i_vnode;
|
|
}
|
|
|
|
/*
|
|
* For regular files we only update the on-disk filesize when actually
|
|
* writing data back to disk. Until then only the copy in the VFS inode
|
|
* is uptodate.
|
|
*/
|
|
static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
|
|
{
|
|
if (S_ISREG(ip->i_d.di_mode))
|
|
return i_size_read(VFS_I(ip));
|
|
return ip->i_d.di_size;
|
|
}
|
|
|
|
/*
|
|
* If this I/O goes past the on-disk inode size update it unless it would
|
|
* be past the current in-core inode size.
|
|
*/
|
|
static inline xfs_fsize_t
|
|
xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
|
|
{
|
|
xfs_fsize_t i_size = i_size_read(VFS_I(ip));
|
|
|
|
if (new_size > i_size || new_size < 0)
|
|
new_size = i_size;
|
|
return new_size > ip->i_d.di_size ? new_size : 0;
|
|
}
|
|
|
|
/*
|
|
* i_flags helper functions
|
|
*/
|
|
static inline void
|
|
__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
ip->i_flags |= flags;
|
|
}
|
|
|
|
static inline void
|
|
xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
spin_lock(&ip->i_flags_lock);
|
|
__xfs_iflags_set(ip, flags);
|
|
spin_unlock(&ip->i_flags_lock);
|
|
}
|
|
|
|
static inline void
|
|
xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
spin_lock(&ip->i_flags_lock);
|
|
ip->i_flags &= ~flags;
|
|
spin_unlock(&ip->i_flags_lock);
|
|
}
|
|
|
|
static inline int
|
|
__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
return (ip->i_flags & flags);
|
|
}
|
|
|
|
static inline int
|
|
xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
int ret;
|
|
spin_lock(&ip->i_flags_lock);
|
|
ret = __xfs_iflags_test(ip, flags);
|
|
spin_unlock(&ip->i_flags_lock);
|
|
return ret;
|
|
}
|
|
|
|
static inline int
|
|
xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
int ret;
|
|
|
|
spin_lock(&ip->i_flags_lock);
|
|
ret = ip->i_flags & flags;
|
|
if (ret)
|
|
ip->i_flags &= ~flags;
|
|
spin_unlock(&ip->i_flags_lock);
|
|
return ret;
|
|
}
|
|
|
|
static inline int
|
|
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
|
|
{
|
|
int ret;
|
|
|
|
spin_lock(&ip->i_flags_lock);
|
|
ret = ip->i_flags & flags;
|
|
if (!ret)
|
|
ip->i_flags |= flags;
|
|
spin_unlock(&ip->i_flags_lock);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Project quota id helpers (previously projid was 16bit only
|
|
* and using two 16bit values to hold new 32bit projid was chosen
|
|
* to retain compatibility with "old" filesystems).
|
|
*/
|
|
static inline prid_t
|
|
xfs_get_projid(struct xfs_inode *ip)
|
|
{
|
|
return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
|
|
}
|
|
|
|
static inline void
|
|
xfs_set_projid(struct xfs_inode *ip,
|
|
prid_t projid)
|
|
{
|
|
ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
|
|
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
|
|
}
|
|
|
|
static inline prid_t
|
|
xfs_get_initial_prid(struct xfs_inode *dp)
|
|
{
|
|
if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
|
|
return xfs_get_projid(dp);
|
|
|
|
return XFS_PROJID_DEFAULT;
|
|
}
|
|
|
|
/*
|
|
* In-core inode flags.
|
|
*/
|
|
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
|
|
#define XFS_ISTALE (1 << 1) /* inode has been staled */
|
|
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
|
|
#define XFS_INEW (1 << 3) /* inode has just been allocated */
|
|
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
|
|
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
|
|
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
|
|
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
|
|
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
|
|
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
|
|
#define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */
|
|
|
|
/*
|
|
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
|
|
* inode lookup. This prevents unintended behaviour on the new inode from
|
|
* ocurring.
|
|
*/
|
|
#define XFS_IRECLAIM_RESET_FLAGS \
|
|
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
|
|
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
|
|
|
|
/*
|
|
* Synchronize processes attempting to flush the in-core inode back to disk.
|
|
*/
|
|
|
|
extern void __xfs_iflock(struct xfs_inode *ip);
|
|
|
|
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
|
|
{
|
|
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
|
|
}
|
|
|
|
static inline void xfs_iflock(struct xfs_inode *ip)
|
|
{
|
|
if (!xfs_iflock_nowait(ip))
|
|
__xfs_iflock(ip);
|
|
}
|
|
|
|
static inline void xfs_ifunlock(struct xfs_inode *ip)
|
|
{
|
|
xfs_iflags_clear(ip, XFS_IFLOCK);
|
|
smp_mb();
|
|
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
|
|
}
|
|
|
|
static inline int xfs_isiflocked(struct xfs_inode *ip)
|
|
{
|
|
return xfs_iflags_test(ip, XFS_IFLOCK);
|
|
}
|
|
|
|
/*
|
|
* Flags for inode locking.
|
|
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
|
|
* 1<<16 - 1<<32-1 -- lockdep annotation (integers)
|
|
*/
|
|
#define XFS_IOLOCK_EXCL (1<<0)
|
|
#define XFS_IOLOCK_SHARED (1<<1)
|
|
#define XFS_ILOCK_EXCL (1<<2)
|
|
#define XFS_ILOCK_SHARED (1<<3)
|
|
|
|
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
|
|
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
|
|
|
|
#define XFS_LOCK_FLAGS \
|
|
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
|
|
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
|
|
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
|
|
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
|
|
|
|
|
|
/*
|
|
* Flags for lockdep annotations.
|
|
*
|
|
* XFS_LOCK_PARENT - for directory operations that require locking a
|
|
* parent directory inode and a child entry inode. The parent gets locked
|
|
* with this flag so it gets a lockdep subclass of 1 and the child entry
|
|
* lock will have a lockdep subclass of 0.
|
|
*
|
|
* XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
|
|
* inodes do not participate in the normal lock order, and thus have their
|
|
* own subclasses.
|
|
*
|
|
* XFS_LOCK_INUMORDER - for locking several inodes at the some time
|
|
* with xfs_lock_inodes(). This flag is used as the starting subclass
|
|
* and each subsequent lock acquired will increment the subclass by one.
|
|
* So the first lock acquired will have a lockdep subclass of 4, the
|
|
* second lock will have a lockdep subclass of 5, and so on. It is
|
|
* the responsibility of the class builder to shift this to the correct
|
|
* portion of the lock_mode lockdep mask.
|
|
*/
|
|
#define XFS_LOCK_PARENT 1
|
|
#define XFS_LOCK_RTBITMAP 2
|
|
#define XFS_LOCK_RTSUM 3
|
|
#define XFS_LOCK_INUMORDER 4
|
|
|
|
#define XFS_IOLOCK_SHIFT 16
|
|
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
|
|
|
|
#define XFS_ILOCK_SHIFT 24
|
|
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
|
|
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
|
|
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
|
|
|
|
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
|
|
#define XFS_ILOCK_DEP_MASK 0xff000000
|
|
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
|
|
|
|
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
|
|
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
|
|
|
|
/*
|
|
* For multiple groups support: if S_ISGID bit is set in the parent
|
|
* directory, group of new file is set to that of the parent, and
|
|
* new subdirectory gets S_ISGID bit from parent.
|
|
*/
|
|
#define XFS_INHERIT_GID(pip) \
|
|
(((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
|
|
((pip)->i_d.di_mode & S_ISGID))
|
|
|
|
int xfs_release(struct xfs_inode *ip);
|
|
void xfs_inactive(struct xfs_inode *ip);
|
|
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
|
|
struct xfs_inode **ipp, struct xfs_name *ci_name);
|
|
int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
|
|
umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
|
|
int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
|
|
umode_t mode, struct xfs_inode **ipp);
|
|
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
|
|
struct xfs_inode *ip);
|
|
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
|
|
struct xfs_name *target_name);
|
|
int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
|
|
struct xfs_inode *src_ip, struct xfs_inode *target_dp,
|
|
struct xfs_name *target_name,
|
|
struct xfs_inode *target_ip, unsigned int flags);
|
|
|
|
void xfs_ilock(xfs_inode_t *, uint);
|
|
int xfs_ilock_nowait(xfs_inode_t *, uint);
|
|
void xfs_iunlock(xfs_inode_t *, uint);
|
|
void xfs_ilock_demote(xfs_inode_t *, uint);
|
|
int xfs_isilocked(xfs_inode_t *, uint);
|
|
uint xfs_ilock_data_map_shared(struct xfs_inode *);
|
|
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
|
|
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
|
|
xfs_nlink_t, xfs_dev_t, prid_t, int,
|
|
struct xfs_buf **, xfs_inode_t **);
|
|
|
|
uint xfs_ip2xflags(struct xfs_inode *);
|
|
uint xfs_dic2xflags(struct xfs_dinode *);
|
|
int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
|
|
struct xfs_bmap_free *);
|
|
int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
|
|
int, xfs_fsize_t);
|
|
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
|
|
|
|
void xfs_iext_realloc(xfs_inode_t *, int, int);
|
|
|
|
void xfs_iunpin_wait(xfs_inode_t *);
|
|
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
|
|
|
|
int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
|
|
void xfs_lock_inodes(xfs_inode_t **, int, uint);
|
|
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
|
|
|
|
xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
|
|
|
|
int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
|
|
xfs_nlink_t, xfs_dev_t, prid_t, int,
|
|
struct xfs_inode **, int *);
|
|
int xfs_droplink(struct xfs_trans *, struct xfs_inode *);
|
|
int xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
|
|
|
|
/* from xfs_file.c */
|
|
enum xfs_prealloc_flags {
|
|
XFS_PREALLOC_SET = (1 << 1),
|
|
XFS_PREALLOC_CLEAR = (1 << 2),
|
|
XFS_PREALLOC_SYNC = (1 << 3),
|
|
XFS_PREALLOC_INVISIBLE = (1 << 4),
|
|
};
|
|
|
|
int xfs_update_prealloc_flags(struct xfs_inode *ip,
|
|
enum xfs_prealloc_flags flags);
|
|
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
|
|
xfs_fsize_t isize, bool *did_zeroing);
|
|
int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
|
|
|
|
|
|
#define IHOLD(ip) \
|
|
do { \
|
|
ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
|
|
ihold(VFS_I(ip)); \
|
|
trace_xfs_ihold(ip, _THIS_IP_); \
|
|
} while (0)
|
|
|
|
#define IRELE(ip) \
|
|
do { \
|
|
trace_xfs_irele(ip, _THIS_IP_); \
|
|
iput(VFS_I(ip)); \
|
|
} while (0)
|
|
|
|
extern struct kmem_zone *xfs_inode_zone;
|
|
|
|
/*
|
|
* Flags for read/write calls
|
|
*/
|
|
#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
|
|
#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
|
|
|
|
#define XFS_IO_FLAGS \
|
|
{ XFS_IO_ISDIRECT, "DIRECT" }, \
|
|
{ XFS_IO_INVIS, "INVIS"}
|
|
|
|
#endif /* __XFS_INODE_H__ */
|