forked from Minki/linux
3fd129b63f
One unfortunate quirk of the reference count and reverse mapping btrees -- they can expand in size when blocks are written to *other* allocation groups if, say, one large extent becomes a lot of tiny extents. Since we don't want to start throwing errors in the middle of CoWing, we need to reserve some blocks to handle future expansion. The transaction block reservation counters aren't sufficient here because we have to have a reserve of blocks in every AG, not just somewhere in the filesystem. Therefore, create two per-AG block reservation pools. One feeds the AGFL so that rmapbt expansion always succeeds, and the other feeds all other metadata so that refcountbt expansion never fails. Use the count of how many reserved blocks we need to have on hand to create a virtual reservation in the AG. Through selective clamping of the maximum length of allocation requests and of the length of the longest free extent, we can make it look like there's less free space in the AG unless the reservation owner is asking for blocks. In other words, play some accounting tricks in-core to make sure that we always have blocks available. On the plus side, there's nothing to clean up if we crash, which is contrast to the strategy that the rough draft used (actually removing extents from the freespace btrees). Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
240 lines
6.1 KiB
C
240 lines
6.1 KiB
C
/*
|
|
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_extfree_item.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_trace.h"
|
|
|
|
/*
|
|
* This routine is called to allocate an "extent free done"
|
|
* log item that will hold nextents worth of extents. The
|
|
* caller must use all nextents extents, because we are not
|
|
* flexible about this at all.
|
|
*/
|
|
struct xfs_efd_log_item *
|
|
xfs_trans_get_efd(struct xfs_trans *tp,
|
|
struct xfs_efi_log_item *efip,
|
|
uint nextents)
|
|
{
|
|
struct xfs_efd_log_item *efdp;
|
|
|
|
ASSERT(tp != NULL);
|
|
ASSERT(nextents > 0);
|
|
|
|
efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
|
|
ASSERT(efdp != NULL);
|
|
|
|
/*
|
|
* Get a log_item_desc to point at the new item.
|
|
*/
|
|
xfs_trans_add_item(tp, &efdp->efd_item);
|
|
return efdp;
|
|
}
|
|
|
|
/*
|
|
* Free an extent and log it to the EFD. Note that the transaction is marked
|
|
* dirty regardless of whether the extent free succeeds or fails to support the
|
|
* EFI/EFD lifecycle rules.
|
|
*/
|
|
int
|
|
xfs_trans_free_extent(
|
|
struct xfs_trans *tp,
|
|
struct xfs_efd_log_item *efdp,
|
|
xfs_fsblock_t start_block,
|
|
xfs_extlen_t ext_len,
|
|
struct xfs_owner_info *oinfo)
|
|
{
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
uint next_extent;
|
|
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
|
|
xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block);
|
|
struct xfs_extent *extp;
|
|
int error;
|
|
|
|
trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
|
|
|
|
error = xfs_free_extent(tp, start_block, ext_len, oinfo,
|
|
XFS_AG_RESV_NONE);
|
|
|
|
/*
|
|
* Mark the transaction dirty, even on error. This ensures the
|
|
* transaction is aborted, which:
|
|
*
|
|
* 1.) releases the EFI and frees the EFD
|
|
* 2.) shuts down the filesystem
|
|
*/
|
|
tp->t_flags |= XFS_TRANS_DIRTY;
|
|
efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
|
|
|
next_extent = efdp->efd_next_extent;
|
|
ASSERT(next_extent < efdp->efd_format.efd_nextents);
|
|
extp = &(efdp->efd_format.efd_extents[next_extent]);
|
|
extp->ext_start = start_block;
|
|
extp->ext_len = ext_len;
|
|
efdp->efd_next_extent++;
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Sort bmap items by AG. */
|
|
static int
|
|
xfs_extent_free_diff_items(
|
|
void *priv,
|
|
struct list_head *a,
|
|
struct list_head *b)
|
|
{
|
|
struct xfs_mount *mp = priv;
|
|
struct xfs_extent_free_item *ra;
|
|
struct xfs_extent_free_item *rb;
|
|
|
|
ra = container_of(a, struct xfs_extent_free_item, xefi_list);
|
|
rb = container_of(b, struct xfs_extent_free_item, xefi_list);
|
|
return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
|
|
XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
|
|
}
|
|
|
|
/* Get an EFI. */
|
|
STATIC void *
|
|
xfs_extent_free_create_intent(
|
|
struct xfs_trans *tp,
|
|
unsigned int count)
|
|
{
|
|
struct xfs_efi_log_item *efip;
|
|
|
|
ASSERT(tp != NULL);
|
|
ASSERT(count > 0);
|
|
|
|
efip = xfs_efi_init(tp->t_mountp, count);
|
|
ASSERT(efip != NULL);
|
|
|
|
/*
|
|
* Get a log_item_desc to point at the new item.
|
|
*/
|
|
xfs_trans_add_item(tp, &efip->efi_item);
|
|
return efip;
|
|
}
|
|
|
|
/* Log a free extent to the intent item. */
|
|
STATIC void
|
|
xfs_extent_free_log_item(
|
|
struct xfs_trans *tp,
|
|
void *intent,
|
|
struct list_head *item)
|
|
{
|
|
struct xfs_efi_log_item *efip = intent;
|
|
struct xfs_extent_free_item *free;
|
|
uint next_extent;
|
|
struct xfs_extent *extp;
|
|
|
|
free = container_of(item, struct xfs_extent_free_item, xefi_list);
|
|
|
|
tp->t_flags |= XFS_TRANS_DIRTY;
|
|
efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
|
|
|
/*
|
|
* atomic_inc_return gives us the value after the increment;
|
|
* we want to use it as an array index so we need to subtract 1 from
|
|
* it.
|
|
*/
|
|
next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
|
|
ASSERT(next_extent < efip->efi_format.efi_nextents);
|
|
extp = &efip->efi_format.efi_extents[next_extent];
|
|
extp->ext_start = free->xefi_startblock;
|
|
extp->ext_len = free->xefi_blockcount;
|
|
}
|
|
|
|
/* Get an EFD so we can process all the free extents. */
|
|
STATIC void *
|
|
xfs_extent_free_create_done(
|
|
struct xfs_trans *tp,
|
|
void *intent,
|
|
unsigned int count)
|
|
{
|
|
return xfs_trans_get_efd(tp, intent, count);
|
|
}
|
|
|
|
/* Process a free extent. */
|
|
STATIC int
|
|
xfs_extent_free_finish_item(
|
|
struct xfs_trans *tp,
|
|
struct xfs_defer_ops *dop,
|
|
struct list_head *item,
|
|
void *done_item,
|
|
void **state)
|
|
{
|
|
struct xfs_extent_free_item *free;
|
|
int error;
|
|
|
|
free = container_of(item, struct xfs_extent_free_item, xefi_list);
|
|
error = xfs_trans_free_extent(tp, done_item,
|
|
free->xefi_startblock,
|
|
free->xefi_blockcount,
|
|
&free->xefi_oinfo);
|
|
kmem_free(free);
|
|
return error;
|
|
}
|
|
|
|
/* Abort all pending EFIs. */
|
|
STATIC void
|
|
xfs_extent_free_abort_intent(
|
|
void *intent)
|
|
{
|
|
xfs_efi_release(intent);
|
|
}
|
|
|
|
/* Cancel a free extent. */
|
|
STATIC void
|
|
xfs_extent_free_cancel_item(
|
|
struct list_head *item)
|
|
{
|
|
struct xfs_extent_free_item *free;
|
|
|
|
free = container_of(item, struct xfs_extent_free_item, xefi_list);
|
|
kmem_free(free);
|
|
}
|
|
|
|
static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
|
|
.type = XFS_DEFER_OPS_TYPE_FREE,
|
|
.max_items = XFS_EFI_MAX_FAST_EXTENTS,
|
|
.diff_items = xfs_extent_free_diff_items,
|
|
.create_intent = xfs_extent_free_create_intent,
|
|
.abort_intent = xfs_extent_free_abort_intent,
|
|
.log_item = xfs_extent_free_log_item,
|
|
.create_done = xfs_extent_free_create_done,
|
|
.finish_item = xfs_extent_free_finish_item,
|
|
.cancel_item = xfs_extent_free_cancel_item,
|
|
};
|
|
|
|
/* Register the deferred op type. */
|
|
void
|
|
xfs_extent_free_init_defer_op(void)
|
|
{
|
|
xfs_defer_init_op_type(&xfs_extent_free_defer_type);
|
|
}
|