linux/fs/xfs/xfs_iunlink_item.c
Dave Chinner 784eb7d8dd xfs: add in-memory iunlink log item
Now that we have a clean operation to update the di_next_unlinked
field of inode cluster buffers, we can easily defer this operation
to transaction commit time so we can order the inode cluster buffer
locking consistently.

To do this, we introduce a new in-memory log item to track the
unlinked list item modification that we are going to make. This
follows the same observations as the in-memory double linked list
used to track unlinked inodes in that the inodes on the list are
pinned in memory and cannot go away, and hence we can simply
reference them for the duration of the transaction without needing
to take active references or pin them or look them up.

This allows us to pass the xfs_inode to the transaction commit code
along with the modification to be made, and then order the logged
modifications via the ->iop_sort and ->iop_precommit operations
for the new log item type. As this is an in-memory log item, it
doesn't have formatting, CIL or AIL operational hooks - it exists
purely to run the inode unlink modifications and is then removed
from the transaction item list and freed once the precommit
operation has run.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2022-07-14 11:47:42 +10:00

181 lines
4.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2020-2022, Red Hat, Inc.
* All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_ag.h"
#include "xfs_iunlink_item.h"
#include "xfs_trace.h"
#include "xfs_error.h"
struct kmem_cache *xfs_iunlink_cache;
static inline struct xfs_iunlink_item *IUL_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_iunlink_item, item);
}
static void
xfs_iunlink_item_release(
struct xfs_log_item *lip)
{
struct xfs_iunlink_item *iup = IUL_ITEM(lip);
xfs_perag_put(iup->pag);
kmem_cache_free(xfs_iunlink_cache, IUL_ITEM(lip));
}
static uint64_t
xfs_iunlink_item_sort(
struct xfs_log_item *lip)
{
return IUL_ITEM(lip)->ip->i_ino;
}
/*
* Look up the inode cluster buffer and log the on-disk unlinked inode change
* we need to make.
*/
static int
xfs_iunlink_log_dinode(
struct xfs_trans *tp,
struct xfs_iunlink_item *iup)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_inode *ip = iup->ip;
struct xfs_dinode *dip;
struct xfs_buf *ibp;
int offset;
int error;
error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
if (error)
return error;
/*
* Don't log the unlinked field on stale buffers as this may be the
* transaction that frees the inode cluster and relogging the buffer
* here will incorrectly remove the stale state.
*/
if (ibp->b_flags & XBF_STALE)
goto out;
dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
/* Make sure the old pointer isn't garbage. */
if (be32_to_cpu(dip->di_next_unlinked) != iup->old_agino) {
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
goto out;
}
trace_xfs_iunlink_update_dinode(mp, iup->pag->pag_agno,
XFS_INO_TO_AGINO(mp, ip->i_ino),
be32_to_cpu(dip->di_next_unlinked), iup->next_agino);
dip->di_next_unlinked = cpu_to_be32(iup->next_agino);
offset = ip->i_imap.im_boffset +
offsetof(struct xfs_dinode, di_next_unlinked);
xfs_dinode_calc_crc(mp, dip);
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
return 0;
out:
xfs_trans_brelse(tp, ibp);
return error;
}
/*
* On precommit, we grab the inode cluster buffer for the inode number we were
* passed, then update the next unlinked field for that inode in the buffer and
* log the buffer. This ensures that the inode cluster buffer was logged in the
* correct order w.r.t. other inode cluster buffers. We can then remove the
* iunlink item from the transaction and release it as it is has now served it's
* purpose.
*/
static int
xfs_iunlink_item_precommit(
struct xfs_trans *tp,
struct xfs_log_item *lip)
{
struct xfs_iunlink_item *iup = IUL_ITEM(lip);
int error;
error = xfs_iunlink_log_dinode(tp, iup);
list_del(&lip->li_trans);
xfs_iunlink_item_release(lip);
return error;
}
static const struct xfs_item_ops xfs_iunlink_item_ops = {
.iop_release = xfs_iunlink_item_release,
.iop_sort = xfs_iunlink_item_sort,
.iop_precommit = xfs_iunlink_item_precommit,
};
/*
* Initialize the inode log item for a newly allocated (in-core) inode.
*
* Inode extents can only reside within an AG. Hence specify the starting
* block for the inode chunk by offset within an AG as well as the
* length of the allocated extent.
*
* This joins the item to the transaction and marks it dirty so
* that we don't need a separate call to do this, nor does the
* caller need to know anything about the iunlink item.
*/
int
xfs_iunlink_log_inode(
struct xfs_trans *tp,
struct xfs_inode *ip,
struct xfs_perag *pag,
xfs_agino_t next_agino)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_iunlink_item *iup;
ASSERT(xfs_verify_agino_or_null(pag, next_agino));
ASSERT(xfs_verify_agino_or_null(pag, ip->i_next_unlinked));
/*
* Since we're updating a linked list, we should never find that the
* current pointer is the same as the new value, unless we're
* terminating the list.
*/
if (ip->i_next_unlinked == next_agino) {
if (next_agino != NULLAGINO)
return -EFSCORRUPTED;
return 0;
}
iup = kmem_cache_zalloc(xfs_iunlink_cache, GFP_KERNEL | __GFP_NOFAIL);
xfs_log_item_init(mp, &iup->item, XFS_LI_IUNLINK,
&xfs_iunlink_item_ops);
iup->ip = ip;
iup->next_agino = next_agino;
iup->old_agino = ip->i_next_unlinked;
atomic_inc(&pag->pag_ref);
iup->pag = pag;
xfs_trans_add_item(tp, &iup->item);
tp->t_flags |= XFS_TRANS_DIRTY;
set_bit(XFS_LI_DIRTY, &iup->item.li_flags);
return 0;
}