diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 0805ade2d300..1aa32bfdf0cc 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -186,7 +186,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = { [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, }; -static void +static bool xfs_defer_create_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp, @@ -197,6 +197,7 @@ xfs_defer_create_intent( if (!dfp->dfp_intent) dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort); + return dfp->dfp_intent != NULL; } /* @@ -204,16 +205,18 @@ xfs_defer_create_intent( * associated extents, then add the entire intake list to the end of * the pending list. */ -STATIC void +static bool xfs_defer_create_intents( struct xfs_trans *tp) { struct xfs_defer_pending *dfp; + bool ret = false; list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { trace_xfs_defer_create_intent(tp->t_mountp, dfp); - xfs_defer_create_intent(tp, dfp, true); + ret |= xfs_defer_create_intent(tp, dfp, true); } + return ret; } /* Abort all the intents that were committed. */ @@ -487,7 +490,7 @@ int xfs_defer_finish_noroll( struct xfs_trans **tp) { - struct xfs_defer_pending *dfp; + struct xfs_defer_pending *dfp = NULL; int error = 0; LIST_HEAD(dop_pending); @@ -506,17 +509,20 @@ xfs_defer_finish_noroll( * of time that any one intent item can stick around in memory, * pinning the log tail. */ - xfs_defer_create_intents(*tp); + bool has_intents = xfs_defer_create_intents(*tp); + list_splice_init(&(*tp)->t_dfops, &dop_pending); - error = xfs_defer_trans_roll(tp); - if (error) - goto out_shutdown; + if (has_intents || dfp) { + error = xfs_defer_trans_roll(tp); + if (error) + goto out_shutdown; - /* Possibly relog intent items to keep the log moving. */ - error = xfs_defer_relog(tp, &dop_pending); - if (error) - goto out_shutdown; + /* Relog intent items to keep the log moving. */ + error = xfs_defer_relog(tp, &dop_pending); + if (error) + goto out_shutdown; + } dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, dfp_list); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 9aee4a1e2fe9..1a4cdf550f6d 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -36,7 +36,7 @@ xfs_init_local_fork( int64_t size) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - int mem_size = size, real_size = 0; + int mem_size = size; bool zero_terminate; /* @@ -50,8 +50,7 @@ xfs_init_local_fork( mem_size++; if (size) { - real_size = roundup(mem_size, 4); - ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS); + ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS); memcpy(ifp->if_u1.if_data, data, size); if (zero_terminate) ifp->if_u1.if_data[size] = '\0'; @@ -497,12 +496,7 @@ xfs_idata_realloc( return; } - /* - * For inline data, the underlying buffer must be a multiple of 4 bytes - * in size so that it can be logged and stay on word boundaries. - * We enforce that here. - */ - ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4), + ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size, GFP_NOFS | __GFP_NOFAIL); ifp->if_bytes = new_size; } diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 25c4cab58851..c4381388c0c1 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -54,13 +54,23 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, /* * Values for t_flags. */ -#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ -#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ -#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ -#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ -#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ -#define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ +/* Transaction needs to be logged */ +#define XFS_TRANS_DIRTY (1u << 0) +/* Superblock is dirty and needs to be logged */ +#define XFS_TRANS_SB_DIRTY (1u << 1) +/* Transaction took a permanent log reservation */ +#define XFS_TRANS_PERM_LOG_RES (1u << 2) +/* Synchronous transaction commit needed */ +#define XFS_TRANS_SYNC (1u << 3) +/* Transaction can use reserve block pool */ +#define XFS_TRANS_RESERVE (1u << 4) +/* Transaction should avoid VFS level superblock write accounting */ +#define XFS_TRANS_NO_WRITECOUNT (1u << 5) +/* Transaction has freed blocks returned to it's reservation */ +#define XFS_TRANS_RES_FDBLKS (1u << 6) +/* Transaction contains an intent done log item */ +#define XFS_TRANS_HAS_INTENT_DONE (1u << 7) + /* * LOWMODE is used by the allocator to activate the lowspace algorithm - when * free space is running low the extent allocator may choose to allocate an diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 593ac29cffc7..51f66e982484 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -39,6 +39,7 @@ STATIC void xfs_bui_item_free( struct xfs_bui_log_item *buip) { + kmem_free(buip->bui_item.li_lv_shadow); kmem_cache_free(xfs_bui_cache, buip); } @@ -54,10 +55,11 @@ xfs_bui_release( struct xfs_bui_log_item *buip) { ASSERT(atomic_read(&buip->bui_refcount) > 0); - if (atomic_dec_and_test(&buip->bui_refcount)) { - xfs_trans_ail_delete(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_bui_item_free(buip); - } + if (!atomic_dec_and_test(&buip->bui_refcount)) + return; + + xfs_trans_ail_delete(&buip->bui_item, 0); + xfs_bui_item_free(buip); } @@ -198,14 +200,24 @@ xfs_bud_item_release( struct xfs_bud_log_item *budp = BUD_ITEM(lip); xfs_bui_release(budp->bud_buip); + kmem_free(budp->bud_item.li_lv_shadow); kmem_cache_free(xfs_bud_cache, budp); } +static struct xfs_log_item * +xfs_bud_item_intent( + struct xfs_log_item *lip) +{ + return &BUD_ITEM(lip)->bud_buip->bui_item; +} + static const struct xfs_item_ops xfs_bud_item_ops = { - .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | + XFS_ITEM_INTENT_DONE, .iop_size = xfs_bud_item_size, .iop_format = xfs_bud_item_format, .iop_release = xfs_bud_item_release, + .iop_intent = xfs_bud_item_intent, }; static struct xfs_bud_log_item * @@ -254,7 +266,7 @@ xfs_trans_log_finish_bmap_update( * 1.) releases the BUI and frees the BUD * 2.) shuts down the filesystem */ - tp->t_flags |= XFS_TRANS_DIRTY; + tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); return error; @@ -586,6 +598,7 @@ xfs_bui_item_relog( } static const struct xfs_item_ops xfs_bui_item_ops = { + .flags = XFS_ITEM_INTENT, .iop_size = xfs_bui_item_size, .iop_format = xfs_bui_item_format, .iop_unpin = xfs_bui_item_unpin, diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 0e50f2c9348e..765be054dffe 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -58,10 +58,11 @@ xfs_efi_release( struct xfs_efi_log_item *efip) { ASSERT(atomic_read(&efip->efi_refcount) > 0); - if (atomic_dec_and_test(&efip->efi_refcount)) { - xfs_trans_ail_delete(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); - xfs_efi_item_free(efip); - } + if (!atomic_dec_and_test(&efip->efi_refcount)) + return; + + xfs_trans_ail_delete(&efip->efi_item, 0); + xfs_efi_item_free(efip); } /* @@ -306,11 +307,20 @@ xfs_efd_item_release( xfs_efd_item_free(efdp); } +static struct xfs_log_item * +xfs_efd_item_intent( + struct xfs_log_item *lip) +{ + return &EFD_ITEM(lip)->efd_efip->efi_item; +} + static const struct xfs_item_ops xfs_efd_item_ops = { - .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | + XFS_ITEM_INTENT_DONE, .iop_size = xfs_efd_item_size, .iop_format = xfs_efd_item_format, .iop_release = xfs_efd_item_release, + .iop_intent = xfs_efd_item_intent, }; /* @@ -380,7 +390,7 @@ xfs_trans_free_extent( * 1.) releases the EFI and frees the EFD * 2.) shuts down the filesystem */ - tp->t_flags |= XFS_TRANS_DIRTY; + tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); next_extent = efdp->efd_next_extent; @@ -688,6 +698,7 @@ xfs_efi_item_relog( } static const struct xfs_item_ops xfs_efi_item_ops = { + .flags = XFS_ITEM_INTENT, .iop_size = xfs_efi_item_size, .iop_format = xfs_efi_item_format, .iop_unpin = xfs_efi_item_unpin, diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 508e184e3b8f..b05314d48176 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -63,6 +63,7 @@ STATIC void xfs_icreate_item_release( struct xfs_log_item *lip) { + kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow); kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 00733a18ccdc..721def0639fd 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -71,7 +71,7 @@ xfs_inode_item_data_fork_size( case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { - *nbytes += roundup(ip->i_df.if_bytes, 4); + *nbytes += xlog_calc_iovec_len(ip->i_df.if_bytes); *nvecs += 1; } break; @@ -112,7 +112,7 @@ xfs_inode_item_attr_fork_size( case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { - *nbytes += roundup(ip->i_afp->if_bytes, 4); + *nbytes += xlog_calc_iovec_len(ip->i_afp->if_bytes); *nvecs += 1; } break; @@ -204,17 +204,12 @@ xfs_inode_item_format_data_fork( ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { - /* - * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed - * to be there by xfs_idata_realloc(). - */ - data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_disk_size > 0); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, - ip->i_df.if_u1.if_data, data_bytes); - ilf->ilf_dsize = (unsigned)data_bytes; + ip->i_df.if_u1.if_data, + ip->i_df.if_bytes); + ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; @@ -288,17 +283,11 @@ xfs_inode_item_format_attr_fork( if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { - /* - * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed - * to be there by xfs_idata_realloc(). - */ - data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT(ip->i_afp->if_u1.if_data != NULL); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, ip->i_afp->if_u1.if_data, - data_bytes); - ilf->ilf_asize = (unsigned)data_bytes; + ip->i_afp->if_bytes); + ilf->ilf_asize = (unsigned)ip->i_afp->if_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 6d44f5fd6d7e..d28ffaebd067 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -462,7 +462,7 @@ xlog_recover_inode_commit_pass2( ASSERT(in_f->ilf_size <= 4); ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); ASSERT(!(fields & XFS_ILOG_DFORK) || - (len == in_f->ilf_dsize)); + (len == xlog_calc_iovec_len(in_f->ilf_dsize))); switch (fields & XFS_ILOG_DFORK) { case XFS_ILOG_DDATA: @@ -497,7 +497,7 @@ xlog_recover_inode_commit_pass2( } len = item->ri_buf[attr_index].i_len; src = item->ri_buf[attr_index].i_addr; - ASSERT(len == in_f->ilf_asize); + ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize)); switch (in_f->ilf_fields & XFS_ILOG_AFORK) { case XFS_ILOG_ADATA: diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 8dafe8f771c7..3a4f6a4e4eb7 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -21,23 +21,59 @@ struct xfs_log_vec { #define XFS_LOG_VEC_ORDERED (-1) +/* + * Calculate the log iovec length for a given user buffer length. Intended to be + * used by ->iop_size implementations when sizing buffers of arbitrary + * alignments. + */ +static inline int +xlog_calc_iovec_len(int len) +{ + return roundup(len, sizeof(uint32_t)); +} + void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, uint type); static inline void -xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) +xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, + int data_len) { struct xlog_op_header *oph = vec->i_addr; + int len; - /* opheader tracks payload length, logvec tracks region length */ + /* + * Always round up the length to the correct alignment so callers don't + * need to know anything about this log vec layout requirement. This + * means we have to zero the area the data to be written does not cover. + * This is complicated by fact the payload region is offset into the + * logvec region by the opheader that tracks the payload. + */ + len = xlog_calc_iovec_len(data_len); + if (len - data_len != 0) { + char *buf = vec->i_addr + sizeof(struct xlog_op_header); + + memset(buf + data_len, 0, len - data_len); + } + + /* + * The opheader tracks aligned payload length, whilst the logvec tracks + * the overall region length. + */ oph->oh_len = cpu_to_be32(len); len += sizeof(struct xlog_op_header); lv->lv_buf_len += len; lv->lv_bytes += len; vec->i_len = len; + + /* Catch buffer overruns */ + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size); } +/* + * Copy the amount of data requested by the caller into a new log iovec. + */ static inline void * xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, uint type, void *data, int len) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index e5ab62f08c19..70f718d76ceb 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -47,6 +47,38 @@ xlog_cil_ticket_alloc( return tic; } +/* + * Check if the current log item was first committed in this sequence. + * We can't rely on just the log item being in the CIL, we have to check + * the recorded commit sequence number. + * + * Note: for this to be used in a non-racy manner, it has to be called with + * CIL flushing locked out. As a result, it should only be used during the + * transaction commit process when deciding what to format into the item. + */ +static bool +xlog_item_in_current_chkpt( + struct xfs_cil *cil, + struct xfs_log_item *lip) +{ + if (list_empty(&lip->li_cil)) + return false; + + /* + * li_seq is written on the first commit of a log item to record the + * first checkpoint it is written to. Hence if it is different to the + * current sequence, we're in a new checkpoint. + */ + return lip->li_seq == READ_ONCE(cil->xc_current_sequence); +} + +bool +xfs_log_item_in_current_chkpt( + struct xfs_log_item *lip) +{ + return xlog_item_in_current_chkpt(lip->li_log->l_cilp, lip); +} + /* * Unavoidable forward declaration - xlog_cil_push_work() calls * xlog_cil_ctx_alloc() itself. @@ -444,7 +476,8 @@ insert: static void xlog_cil_insert_items( struct xlog *log, - struct xfs_trans *tp) + struct xfs_trans *tp, + uint32_t released_space) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx = cil->xc_ctx; @@ -493,7 +526,9 @@ xlog_cil_insert_items( ASSERT(tp->t_ticket->t_curr_res >= len); } tp->t_ticket->t_curr_res -= len; + tp->t_ticket->t_curr_res += released_space; ctx->space_used += len; + ctx->space_used -= released_space; /* * If we've overrun the reservation, dump the tx details before we move @@ -934,6 +969,65 @@ xlog_cil_build_trans_hdr( tic->t_curr_res -= lvhdr->lv_bytes; } +/* + * Pull all the log vectors off the items in the CIL, and remove the items from + * the CIL. We don't need the CIL lock here because it's only needed on the + * transaction commit side which is currently locked out by the flush lock. + * + * If a log item is marked with a whiteout, we do not need to write it to the + * journal and so we just move them to the whiteout list for the caller to + * dispose of appropriately. + */ +static void +xlog_cil_build_lv_chain( + struct xfs_cil *cil, + struct xfs_cil_ctx *ctx, + struct list_head *whiteouts, + uint32_t *num_iovecs, + uint32_t *num_bytes) +{ + struct xfs_log_vec *lv = NULL; + + while (!list_empty(&cil->xc_cil)) { + struct xfs_log_item *item; + + item = list_first_entry(&cil->xc_cil, + struct xfs_log_item, li_cil); + + if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) { + list_move(&item->li_cil, whiteouts); + trace_xfs_cil_whiteout_skip(item); + continue; + } + + list_del_init(&item->li_cil); + if (!ctx->lv_chain) + ctx->lv_chain = item->li_lv; + else + lv->lv_next = item->li_lv; + lv = item->li_lv; + item->li_lv = NULL; + *num_iovecs += lv->lv_niovecs; + + /* we don't write ordered log vectors */ + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) + *num_bytes += lv->lv_bytes; + } +} + +static void +xlog_cil_cleanup_whiteouts( + struct list_head *whiteouts) +{ + while (!list_empty(whiteouts)) { + struct xfs_log_item *item = list_first_entry(whiteouts, + struct xfs_log_item, li_cil); + list_del_init(&item->li_cil); + trace_xfs_cil_whiteout_unpin(item); + item->li_ops->iop_unpin(item, 1); + } +} + /* * Push the Committed Item List to the log. * @@ -956,7 +1050,6 @@ xlog_cil_push_work( container_of(work, struct xfs_cil_ctx, push_work); struct xfs_cil *cil = ctx->cil; struct xlog *log = cil->xc_log; - struct xfs_log_vec *lv; struct xfs_cil_ctx *new_ctx; int num_iovecs = 0; int num_bytes = 0; @@ -965,6 +1058,7 @@ xlog_cil_push_work( struct xfs_log_vec lvhdr = { NULL }; xfs_csn_t push_seq; bool push_commit_stable; + LIST_HEAD (whiteouts); new_ctx = xlog_cil_ctx_alloc(); new_ctx->ticket = xlog_cil_ticket_alloc(log); @@ -1033,31 +1127,7 @@ xlog_cil_push_work( list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); - /* - * Pull all the log vectors off the items in the CIL, and remove the - * items from the CIL. We don't need the CIL lock here because it's only - * needed on the transaction commit side which is currently locked out - * by the flush lock. - */ - lv = NULL; - while (!list_empty(&cil->xc_cil)) { - struct xfs_log_item *item; - - item = list_first_entry(&cil->xc_cil, - struct xfs_log_item, li_cil); - list_del_init(&item->li_cil); - if (!ctx->lv_chain) - ctx->lv_chain = item->li_lv; - else - lv->lv_next = item->li_lv; - lv = item->li_lv; - item->li_lv = NULL; - num_iovecs += lv->lv_niovecs; - - /* we don't write ordered log vectors */ - if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) - num_bytes += lv->lv_bytes; - } + xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes); /* * Switch the contexts so we can drop the context lock and move out @@ -1160,6 +1230,7 @@ xlog_cil_push_work( /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); + xlog_cil_cleanup_whiteouts(&whiteouts); return; out_skip: @@ -1171,6 +1242,7 @@ out_skip: out_abort_free_ticket: xfs_log_ticket_ungrant(log, ctx->ticket); ASSERT(xlog_is_shutdown(log)); + xlog_cil_cleanup_whiteouts(&whiteouts); if (!ctx->commit_iclog) { xlog_cil_committed(ctx); return; @@ -1319,6 +1391,43 @@ xlog_cil_empty( return empty; } +/* + * If there are intent done items in this transaction and the related intent was + * committed in the current (same) CIL checkpoint, we don't need to write either + * the intent or intent done item to the journal as the change will be + * journalled atomically within this checkpoint. As we cannot remove items from + * the CIL here, mark the related intent with a whiteout so that the CIL push + * can remove it rather than writing it to the journal. Then remove the intent + * done item from the current transaction and release it so it doesn't get put + * into the CIL at all. + */ +static uint32_t +xlog_cil_process_intents( + struct xfs_cil *cil, + struct xfs_trans *tp) +{ + struct xfs_log_item *lip, *ilip, *next; + uint32_t len = 0; + + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { + if (!(lip->li_ops->flags & XFS_ITEM_INTENT_DONE)) + continue; + + ilip = lip->li_ops->iop_intent(lip); + if (!ilip || !xlog_item_in_current_chkpt(cil, ilip)) + continue; + set_bit(XFS_LI_WHITEOUT, &ilip->li_flags); + trace_xfs_cil_whiteout_mark(ilip); + len += ilip->li_lv->lv_bytes; + kmem_free(ilip->li_lv); + ilip->li_lv = NULL; + + xfs_trans_del_item(lip); + lip->li_ops->iop_release(lip); + } + return len; +} + /* * Commit a transaction with the given vector to the Committed Item List. * @@ -1341,6 +1450,7 @@ xlog_cil_commit( { struct xfs_cil *cil = log->l_cilp; struct xfs_log_item *lip, *next; + uint32_t released_space = 0; /* * Do all necessary memory allocation before we lock the CIL. @@ -1352,7 +1462,10 @@ xlog_cil_commit( /* lock out background commit */ down_read(&cil->xc_ctx_lock); - xlog_cil_insert_items(log, tp); + if (tp->t_flags & XFS_TRANS_HAS_INTENT_DONE) + released_space = xlog_cil_process_intents(cil, tp); + + xlog_cil_insert_items(log, tp, released_space); if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); @@ -1508,32 +1621,6 @@ out_shutdown: return 0; } -/* - * Check if the current log item was first committed in this sequence. - * We can't rely on just the log item being in the CIL, we have to check - * the recorded commit sequence number. - * - * Note: for this to be used in a non-racy manner, it has to be called with - * CIL flushing locked out. As a result, it should only be used during the - * transaction commit process when deciding what to format into the item. - */ -bool -xfs_log_item_in_current_chkpt( - struct xfs_log_item *lip) -{ - struct xfs_cil *cil = lip->li_log->l_cilp; - - if (list_empty(&lip->li_cil)) - return false; - - /* - * li_seq is written on the first commit of a log item to record the - * first checkpoint it is written to. Hence if it is different to the - * current sequence, we're in a new checkpoint. - */ - return lip->li_seq == READ_ONCE(cil->xc_current_sequence); -} - /* * Perform initial CIL structure initialisation. */ diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 0d868c93144d..7e97bf19793d 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -35,6 +35,7 @@ STATIC void xfs_cui_item_free( struct xfs_cui_log_item *cuip) { + kmem_free(cuip->cui_item.li_lv_shadow); if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else @@ -53,10 +54,11 @@ xfs_cui_release( struct xfs_cui_log_item *cuip) { ASSERT(atomic_read(&cuip->cui_refcount) > 0); - if (atomic_dec_and_test(&cuip->cui_refcount)) { - xfs_trans_ail_delete(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_cui_item_free(cuip); - } + if (!atomic_dec_and_test(&cuip->cui_refcount)) + return; + + xfs_trans_ail_delete(&cuip->cui_item, 0); + xfs_cui_item_free(cuip); } @@ -204,14 +206,24 @@ xfs_cud_item_release( struct xfs_cud_log_item *cudp = CUD_ITEM(lip); xfs_cui_release(cudp->cud_cuip); + kmem_free(cudp->cud_item.li_lv_shadow); kmem_cache_free(xfs_cud_cache, cudp); } +static struct xfs_log_item * +xfs_cud_item_intent( + struct xfs_log_item *lip) +{ + return &CUD_ITEM(lip)->cud_cuip->cui_item; +} + static const struct xfs_item_ops xfs_cud_item_ops = { - .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | + XFS_ITEM_INTENT_DONE, .iop_size = xfs_cud_item_size, .iop_format = xfs_cud_item_format, .iop_release = xfs_cud_item_release, + .iop_intent = xfs_cud_item_intent, }; static struct xfs_cud_log_item * @@ -259,7 +271,7 @@ xfs_trans_log_finish_refcount_update( * 1.) releases the CUI and frees the CUD * 2.) shuts down the filesystem */ - tp->t_flags |= XFS_TRANS_DIRTY; + tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); return error; @@ -600,6 +612,7 @@ xfs_cui_item_relog( } static const struct xfs_item_ops xfs_cui_item_ops = { + .flags = XFS_ITEM_INTENT, .iop_size = xfs_cui_item_size, .iop_format = xfs_cui_item_format, .iop_unpin = xfs_cui_item_unpin, diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index a22b2d19ef91..fef92e02f3bb 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -35,6 +35,7 @@ STATIC void xfs_rui_item_free( struct xfs_rui_log_item *ruip) { + kmem_free(ruip->rui_item.li_lv_shadow); if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else @@ -53,10 +54,11 @@ xfs_rui_release( struct xfs_rui_log_item *ruip) { ASSERT(atomic_read(&ruip->rui_refcount) > 0); - if (atomic_dec_and_test(&ruip->rui_refcount)) { - xfs_trans_ail_delete(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_rui_item_free(ruip); - } + if (!atomic_dec_and_test(&ruip->rui_refcount)) + return; + + xfs_trans_ail_delete(&ruip->rui_item, 0); + xfs_rui_item_free(ruip); } STATIC void @@ -227,14 +229,24 @@ xfs_rud_item_release( struct xfs_rud_log_item *rudp = RUD_ITEM(lip); xfs_rui_release(rudp->rud_ruip); + kmem_free(rudp->rud_item.li_lv_shadow); kmem_cache_free(xfs_rud_cache, rudp); } +static struct xfs_log_item * +xfs_rud_item_intent( + struct xfs_log_item *lip) +{ + return &RUD_ITEM(lip)->rud_ruip->rui_item; +} + static const struct xfs_item_ops xfs_rud_item_ops = { - .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | + XFS_ITEM_INTENT_DONE, .iop_size = xfs_rud_item_size, .iop_format = xfs_rud_item_format, .iop_release = xfs_rud_item_release, + .iop_intent = xfs_rud_item_intent, }; static struct xfs_rud_log_item * @@ -327,7 +339,7 @@ xfs_trans_log_finish_rmap_update( * 1.) releases the RUI and frees the RUD * 2.) shuts down the filesystem */ - tp->t_flags |= XFS_TRANS_DIRTY; + tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); return error; @@ -630,6 +642,7 @@ xfs_rui_item_relog( } static const struct xfs_item_ops xfs_rui_item_ops = { + .flags = XFS_ITEM_INTENT, .iop_size = xfs_rui_item_size, .iop_format = xfs_rui_item_format, .iop_unpin = xfs_rui_item_unpin, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e1197f9ad97e..75934e3c3f55 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1332,6 +1332,9 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_push); DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); +DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); +DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); +DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 87e940b5366e..9561f193e7e1 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -55,13 +55,15 @@ struct xfs_log_item { #define XFS_LI_IN_AIL 0 #define XFS_LI_ABORTED 1 #define XFS_LI_FAILED 2 -#define XFS_LI_DIRTY 3 /* log item dirty in transaction */ +#define XFS_LI_DIRTY 3 +#define XFS_LI_WHITEOUT 4 #define XFS_LI_FLAGS \ { (1u << XFS_LI_IN_AIL), "IN_AIL" }, \ { (1u << XFS_LI_ABORTED), "ABORTED" }, \ { (1u << XFS_LI_FAILED), "FAILED" }, \ - { (1u << XFS_LI_DIRTY), "DIRTY" } + { (1u << XFS_LI_DIRTY), "DIRTY" }, \ + { (1u << XFS_LI_WHITEOUT), "WHITEOUT" } struct xfs_item_ops { unsigned flags; @@ -78,30 +80,32 @@ struct xfs_item_ops { bool (*iop_match)(struct xfs_log_item *item, uint64_t id); struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent, struct xfs_trans *tp); + struct xfs_log_item *(*iop_intent)(struct xfs_log_item *intent_done); }; -/* Is this log item a deferred action intent? */ +/* + * Log item ops flags + */ +/* + * Release the log item when the journal commits instead of inserting into the + * AIL for writeback tracking and/or log tail pinning. + */ +#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) +#define XFS_ITEM_INTENT (1 << 1) +#define XFS_ITEM_INTENT_DONE (1 << 2) + static inline bool xlog_item_is_intent(struct xfs_log_item *lip) { - return lip->li_ops->iop_recover != NULL && - lip->li_ops->iop_match != NULL; + return lip->li_ops->flags & XFS_ITEM_INTENT; } -/* Is this a log intent-done item? */ static inline bool xlog_item_is_intent_done(struct xfs_log_item *lip) { - return lip->li_ops->iop_unpin == NULL && - lip->li_ops->iop_push == NULL; + return lip->li_ops->flags & XFS_ITEM_INTENT_DONE; } -/* - * Release the log item as soon as committed. This is for items just logging - * intents that never need to be written back in place. - */ -#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) - void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, int type, const struct xfs_item_ops *ops);