xfs: only free posteof blocks on first close

Certain workloads fragment files on XFS very badly, such as a software
package that creates a number of threads, each of which repeatedly run
the sequence: open a file, perform a synchronous write, and close the
file, which defeats the speculative preallocation mechanism.  We work
around this problem by only deleting posteof blocks the /first/ time a
file is closed to preserve the behavior that unpacking a tarball lays
out files one after the other with no gaps.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
[hch: rebased, updated comment, renamed the flag]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
This commit is contained in:
Darrick J. Wong 2024-08-13 09:39:39 +02:00 committed by Chandan Babu R
parent 816e3599ca
commit f1204d9645
2 changed files with 13 additions and 23 deletions

View File

@ -1204,15 +1204,21 @@ xfs_file_release(
* exposed to that problem.
*/
if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) {
xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED);
if (ip->i_delayed_blks > 0)
filemap_flush(inode->i_mapping);
}
/*
* XFS aggressively preallocates post-EOF space to generate contiguous
* allocations for writers that append to the end of the file and we
* try to free these when an open file context is released.
* allocations for writers that append to the end of the file.
*
* To support workloads that close and reopen the file frequently, these
* preallocations usually persist after a close unless it is the first
* close for the inode. This is a tradeoff to generate tightly packed
* data layouts for unpacking tarballs or similar archives that write
* one file after another without going back to it while keeping the
* preallocation for files that have recurring open/write/close cycles.
*
* There is no point in freeing blocks here for open but unlinked files
* as they will be taken care of by the inactivation path soon.
@ -1230,25 +1236,9 @@ xfs_file_release(
(file->f_mode & FMODE_WRITE) &&
xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
if (xfs_can_free_eofblocks(ip) &&
!xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) {
/*
* Check if the inode is being opened, written and
* closed frequently and we have delayed allocation
* blocks outstanding (e.g. streaming writes from the
* NFS server), truncating the blocks past EOF will
* cause fragmentation to occur.
*
* In this case don't do the truncation, but we have to
* be careful how we detect this case. Blocks beyond EOF
* show up as i_delayed_blks even when the inode is
* clean, so we need to truncate them away first before
* checking for a dirty release. Hence on the first
* dirty close we will still remove the speculative
* allocation, but after that we will leave it in place.
*/
!xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED)) {
xfs_free_eofblocks(ip);
if (ip->i_delayed_blks)
xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
xfs_iflags_set(ip, XFS_EOFBLOCKS_RELEASED);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}

View File

@ -336,7 +336,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
#define XFS_INEW (1 << 3) /* inode has just been allocated */
#define XFS_IPRESERVE_DM_FIELDS (1 << 4) /* has legacy DMAPI fields set */
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define XFS_EOFBLOCKS_RELEASED (1 << 6) /* eofblocks were freed in ->release */
#define XFS_IFLUSHING (1 << 7) /* inode is being flushed */
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
@ -383,7 +383,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
*/
#define XFS_IRECLAIM_RESET_FLAGS \
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
/*