forked from Minki/linux
Merge branch 'xfs-4.8-buf-fixes' into for-next
This commit is contained in:
commit
bbfeb6141f
172
fs/xfs/xfs_buf.c
172
fs/xfs/xfs_buf.c
@ -79,6 +79,47 @@ xfs_buf_vmap_len(
|
||||
return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bump the I/O in flight count on the buftarg if we haven't yet done so for
|
||||
* this buffer. The count is incremented once per buffer (per hold cycle)
|
||||
* because the corresponding decrement is deferred to buffer release. Buffers
|
||||
* can undergo I/O multiple times in a hold-release cycle and per buffer I/O
|
||||
* tracking adds unnecessary overhead. This is used for sychronization purposes
|
||||
* with unmount (see xfs_wait_buftarg()), so all we really need is a count of
|
||||
* in-flight buffers.
|
||||
*
|
||||
* Buffers that are never released (e.g., superblock, iclog buffers) must set
|
||||
* the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
|
||||
* never reaches zero and unmount hangs indefinitely.
|
||||
*/
|
||||
static inline void
|
||||
xfs_buf_ioacct_inc(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
|
||||
return;
|
||||
|
||||
ASSERT(bp->b_flags & XBF_ASYNC);
|
||||
bp->b_flags |= _XBF_IN_FLIGHT;
|
||||
percpu_counter_inc(&bp->b_target->bt_io_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the in-flight state on a buffer about to be released to the LRU or
|
||||
* freed and unaccount from the buftarg.
|
||||
*/
|
||||
static inline void
|
||||
xfs_buf_ioacct_dec(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
if (!(bp->b_flags & _XBF_IN_FLIGHT))
|
||||
return;
|
||||
|
||||
ASSERT(bp->b_flags & XBF_ASYNC);
|
||||
bp->b_flags &= ~_XBF_IN_FLIGHT;
|
||||
percpu_counter_dec(&bp->b_target->bt_io_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
|
||||
* b_lru_ref count so that the buffer is freed immediately when the buffer
|
||||
@ -102,6 +143,14 @@ xfs_buf_stale(
|
||||
*/
|
||||
bp->b_flags &= ~_XBF_DELWRI_Q;
|
||||
|
||||
/*
|
||||
* Once the buffer is marked stale and unlocked, a subsequent lookup
|
||||
* could reset b_flags. There is no guarantee that the buffer is
|
||||
* unaccounted (released to LRU) before that occurs. Drop in-flight
|
||||
* status now to preserve accounting consistency.
|
||||
*/
|
||||
xfs_buf_ioacct_dec(bp);
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
atomic_set(&bp->b_lru_ref, 0);
|
||||
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
|
||||
@ -815,7 +864,8 @@ xfs_buf_get_uncached(
|
||||
struct xfs_buf *bp;
|
||||
DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
|
||||
|
||||
bp = _xfs_buf_alloc(target, &map, 1, 0);
|
||||
/* flags might contain irrelevant bits, pass only what we care about */
|
||||
bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
|
||||
if (unlikely(bp == NULL))
|
||||
goto fail;
|
||||
|
||||
@ -866,63 +916,85 @@ xfs_buf_hold(
|
||||
}
|
||||
|
||||
/*
|
||||
* Releases a hold on the specified buffer. If the
|
||||
* the hold count is 1, calls xfs_buf_free.
|
||||
* Release a hold on the specified buffer. If the hold count is 1, the buffer is
|
||||
* placed on LRU or freed (depending on b_lru_ref).
|
||||
*/
|
||||
void
|
||||
xfs_buf_rele(
|
||||
xfs_buf_t *bp)
|
||||
{
|
||||
struct xfs_perag *pag = bp->b_pag;
|
||||
bool release;
|
||||
bool freebuf = false;
|
||||
|
||||
trace_xfs_buf_rele(bp, _RET_IP_);
|
||||
|
||||
if (!pag) {
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
if (atomic_dec_and_test(&bp->b_hold))
|
||||
if (atomic_dec_and_test(&bp->b_hold)) {
|
||||
xfs_buf_ioacct_dec(bp);
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
|
||||
ASSERT(atomic_read(&bp->b_hold) > 0);
|
||||
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
|
||||
spin_lock(&bp->b_lock);
|
||||
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
|
||||
/*
|
||||
* If the buffer is added to the LRU take a new
|
||||
* reference to the buffer for the LRU and clear the
|
||||
* (now stale) dispose list state flag
|
||||
*/
|
||||
if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
|
||||
bp->b_state &= ~XFS_BSTATE_DISPOSE;
|
||||
atomic_inc(&bp->b_hold);
|
||||
}
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
} else {
|
||||
/*
|
||||
* most of the time buffers will already be removed from
|
||||
* the LRU, so optimise that case by checking for the
|
||||
* XFS_BSTATE_DISPOSE flag indicating the last list the
|
||||
* buffer was on was the disposal list
|
||||
*/
|
||||
if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
|
||||
list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
} else {
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
}
|
||||
spin_unlock(&bp->b_lock);
|
||||
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
|
||||
spin_lock(&bp->b_lock);
|
||||
if (!release) {
|
||||
/*
|
||||
* Drop the in-flight state if the buffer is already on the LRU
|
||||
* and it holds the only reference. This is racy because we
|
||||
* haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
|
||||
* ensures the decrement occurs only once per-buf.
|
||||
*/
|
||||
if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
|
||||
xfs_buf_ioacct_dec(bp);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* the last reference has been dropped ... */
|
||||
xfs_buf_ioacct_dec(bp);
|
||||
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
|
||||
/*
|
||||
* If the buffer is added to the LRU take a new reference to the
|
||||
* buffer for the LRU and clear the (now stale) dispose list
|
||||
* state flag
|
||||
*/
|
||||
if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
|
||||
bp->b_state &= ~XFS_BSTATE_DISPOSE;
|
||||
atomic_inc(&bp->b_hold);
|
||||
}
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
} else {
|
||||
/*
|
||||
* most of the time buffers will already be removed from the
|
||||
* LRU, so optimise that case by checking for the
|
||||
* XFS_BSTATE_DISPOSE flag indicating the last list the buffer
|
||||
* was on was the disposal list
|
||||
*/
|
||||
if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
|
||||
list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
} else {
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
}
|
||||
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
freebuf = true;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&bp->b_lock);
|
||||
|
||||
if (freebuf)
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
|
||||
|
||||
@ -1341,6 +1413,7 @@ xfs_buf_submit(
|
||||
* xfs_buf_ioend too early.
|
||||
*/
|
||||
atomic_set(&bp->b_io_remaining, 1);
|
||||
xfs_buf_ioacct_inc(bp);
|
||||
_xfs_buf_ioapply(bp);
|
||||
|
||||
/*
|
||||
@ -1526,13 +1599,19 @@ xfs_wait_buftarg(
|
||||
int loop = 0;
|
||||
|
||||
/*
|
||||
* We need to flush the buffer workqueue to ensure that all IO
|
||||
* completion processing is 100% done. Just waiting on buffer locks is
|
||||
* not sufficient for async IO as the reference count held over IO is
|
||||
* not released until after the buffer lock is dropped. Hence we need to
|
||||
* ensure here that all reference counts have been dropped before we
|
||||
* start walking the LRU list.
|
||||
* First wait on the buftarg I/O count for all in-flight buffers to be
|
||||
* released. This is critical as new buffers do not make the LRU until
|
||||
* they are released.
|
||||
*
|
||||
* Next, flush the buffer workqueue to ensure all completion processing
|
||||
* has finished. Just waiting on buffer locks is not sufficient for
|
||||
* async IO as the reference count held over IO is not released until
|
||||
* after the buffer lock is dropped. Hence we need to ensure here that
|
||||
* all reference counts have been dropped before we start walking the
|
||||
* LRU list.
|
||||
*/
|
||||
while (percpu_counter_sum(&btp->bt_io_count))
|
||||
delay(100);
|
||||
drain_workqueue(btp->bt_mount->m_buf_workqueue);
|
||||
|
||||
/* loop until there is nothing left on the lru list. */
|
||||
@ -1629,6 +1708,8 @@ xfs_free_buftarg(
|
||||
struct xfs_buftarg *btp)
|
||||
{
|
||||
unregister_shrinker(&btp->bt_shrinker);
|
||||
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
|
||||
percpu_counter_destroy(&btp->bt_io_count);
|
||||
list_lru_destroy(&btp->bt_lru);
|
||||
|
||||
if (mp->m_flags & XFS_MOUNT_BARRIER)
|
||||
@ -1693,6 +1774,9 @@ xfs_alloc_buftarg(
|
||||
if (list_lru_init(&btp->bt_lru))
|
||||
goto error;
|
||||
|
||||
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
|
||||
goto error;
|
||||
|
||||
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
|
||||
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
|
||||
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
|
||||
@ -1834,7 +1918,7 @@ xfs_buf_delwri_submit_buffers(
|
||||
* side. We need to move the buffer onto the io_list
|
||||
* at this point so the caller can still access it.
|
||||
*/
|
||||
bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
|
||||
bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
|
||||
bp->b_flags |= XBF_WRITE | XBF_ASYNC;
|
||||
if (wait_list) {
|
||||
xfs_buf_hold(bp);
|
||||
|
@ -43,6 +43,7 @@ typedef enum {
|
||||
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
|
||||
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
|
||||
#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
|
||||
#define XBF_NO_IOACCT (1 << 3) /* bypass I/O accounting (non-LRU bufs) */
|
||||
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
|
||||
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
|
||||
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
|
||||
@ -62,6 +63,7 @@ typedef enum {
|
||||
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
|
||||
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
|
||||
#define _XBF_COMPOUND (1 << 23)/* compound buffer */
|
||||
#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */
|
||||
|
||||
typedef unsigned int xfs_buf_flags_t;
|
||||
|
||||
@ -81,7 +83,8 @@ typedef unsigned int xfs_buf_flags_t;
|
||||
{ _XBF_PAGES, "PAGES" }, \
|
||||
{ _XBF_KMEM, "KMEM" }, \
|
||||
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
|
||||
{ _XBF_COMPOUND, "COMPOUND" }
|
||||
{ _XBF_COMPOUND, "COMPOUND" }, \
|
||||
{ _XBF_IN_FLIGHT, "IN_FLIGHT" }
|
||||
|
||||
|
||||
/*
|
||||
@ -115,6 +118,8 @@ typedef struct xfs_buftarg {
|
||||
/* LRU control structures */
|
||||
struct shrinker bt_shrinker;
|
||||
struct list_lru bt_lru;
|
||||
|
||||
struct percpu_counter bt_io_count;
|
||||
} xfs_buftarg_t;
|
||||
|
||||
struct xfs_buf;
|
||||
|
@ -1081,6 +1081,8 @@ xfs_buf_iodone_callback_error(
|
||||
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
|
||||
ASSERT(bp->b_iodone != NULL);
|
||||
|
||||
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
|
||||
|
||||
/*
|
||||
* If the write was asynchronous then no one will be looking for the
|
||||
* error. If this is the first failure of this type, clear the error
|
||||
@ -1088,13 +1090,12 @@ xfs_buf_iodone_callback_error(
|
||||
* async write failure at least once, but we also need to set the buffer
|
||||
* up to behave correctly now for repeated failures.
|
||||
*/
|
||||
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
|
||||
if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) ||
|
||||
bp->b_last_error != bp->b_error) {
|
||||
bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
|
||||
XBF_DONE | XBF_WRITE_FAIL);
|
||||
bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
|
||||
bp->b_last_error = bp->b_error;
|
||||
bp->b_retries = 0;
|
||||
bp->b_first_retry_time = jiffies;
|
||||
if (cfg->retry_timeout && !bp->b_first_retry_time)
|
||||
bp->b_first_retry_time = jiffies;
|
||||
|
||||
xfs_buf_ioerror(bp, 0);
|
||||
xfs_buf_submit(bp);
|
||||
@ -1105,7 +1106,6 @@ xfs_buf_iodone_callback_error(
|
||||
* Repeated failure on an async write. Take action according to the
|
||||
* error configuration we have been set up to use.
|
||||
*/
|
||||
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
|
||||
|
||||
if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
|
||||
++bp->b_retries > cfg->max_retries)
|
||||
|
@ -1415,7 +1415,7 @@ xlog_alloc_log(
|
||||
*/
|
||||
error = -ENOMEM;
|
||||
bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
|
||||
BTOBB(log->l_iclog_size), 0);
|
||||
BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
|
||||
if (!bp)
|
||||
goto out_free_log;
|
||||
|
||||
@ -1454,7 +1454,8 @@ xlog_alloc_log(
|
||||
prev_iclog = iclog;
|
||||
|
||||
bp = xfs_buf_get_uncached(mp->m_logdev_targp,
|
||||
BTOBB(log->l_iclog_size), 0);
|
||||
BTOBB(log->l_iclog_size),
|
||||
XBF_NO_IOACCT);
|
||||
if (!bp)
|
||||
goto out_free_iclog;
|
||||
|
||||
|
@ -272,13 +272,15 @@ xfs_readsb(
|
||||
buf_ops = NULL;
|
||||
|
||||
/*
|
||||
* Allocate a (locked) buffer to hold the superblock.
|
||||
* This will be kept around at all times to optimize
|
||||
* access to the superblock.
|
||||
* Allocate a (locked) buffer to hold the superblock. This will be kept
|
||||
* around at all times to optimize access to the superblock. Therefore,
|
||||
* set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
|
||||
* elevated.
|
||||
*/
|
||||
reread:
|
||||
error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||
BTOBB(sector_size), 0, &bp, buf_ops);
|
||||
BTOBB(sector_size), XBF_NO_IOACCT, &bp,
|
||||
buf_ops);
|
||||
if (error) {
|
||||
if (loud)
|
||||
xfs_warn(mp, "SB validate failed with error %d.", error);
|
||||
|
@ -634,6 +634,9 @@ xfs_error_get_cfg(
|
||||
{
|
||||
struct xfs_error_cfg *cfg;
|
||||
|
||||
if (error < 0)
|
||||
error = -error;
|
||||
|
||||
switch (error) {
|
||||
case EIO:
|
||||
cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
|
||||
|
Loading…
Reference in New Issue
Block a user