xfs: stagger the starting AG of scrub iscans to reduce contention

Online directory and parent repairs on parent-pointer equipped
filesystems have shown that starting a large number of parallel iscans
causes a lot of AGI buffer contention.  Try to reduce this by making it
so that iscans scan wrap around the end of the filesystem, and using a
rotor to stagger where each scanner begins.  Surprisingly, this boosts
CPU utilization (on the author's test machines) from effectively
single-threaded to 160%.  Not great, but see the next patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-02-22 12:30:46 -08:00
parent 4e98cc905c
commit c473a3320b
3 changed files with 90 additions and 13 deletions

View File

@ -170,10 +170,24 @@ xchk_iscan_move_cursor(
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp;
xfs_ino_t cursor, visited;
BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
/*
* Special-case ino == 0 here so that we never set visited_ino to
* NULLFSINO when wrapping around EOFS, for that will let through all
* live updates.
*/
cursor = XFS_AGINO_TO_INO(mp, agno, agino);
if (cursor == 0)
visited = XFS_MAXINUMBER;
else
visited = cursor - 1;
mutex_lock(&iscan->lock);
iscan->cursor_ino = XFS_AGINO_TO_INO(mp, agno, agino);
iscan->__visited_ino = iscan->cursor_ino - 1;
iscan->cursor_ino = cursor;
iscan->__visited_ino = visited;
trace_xchk_iscan_move_cursor(iscan);
mutex_unlock(&iscan->lock);
}
@ -257,12 +271,13 @@ xchk_iscan_advance(
* Did not find any more inodes in this AG, move on to the next
* AG.
*/
xchk_iscan_move_cursor(iscan, ++agno, 0);
agno = (agno + 1) % mp->m_sb.sb_agcount;
xchk_iscan_move_cursor(iscan, agno, 0);
xfs_trans_brelse(sc->tp, agi_bp);
xfs_perag_put(pag);
trace_xchk_iscan_advance_ag(iscan);
} while (agno < mp->m_sb.sb_agcount);
} while (iscan->cursor_ino != iscan->scan_start_ino);
xchk_iscan_finish(iscan);
return 0;
@ -420,6 +435,23 @@ xchk_iscan_teardown(
mutex_destroy(&iscan->lock);
}
/* Pick an AG from which to start a scan. */
static inline xfs_ino_t
xchk_iscan_rotor(
struct xfs_mount *mp)
{
static atomic_t agi_rotor;
unsigned int r = atomic_inc_return(&agi_rotor) - 1;
/*
* Rotoring *backwards* through the AGs, so we add one here before
* subtracting from the agcount to arrive at an AG number.
*/
r = (r % mp->m_sb.sb_agcount) + 1;
return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
}
/*
* Set ourselves up to start an inode scan. If the @iget_timeout and
* @iget_retry_delay parameters are set, the scan will try to iget each inode
@ -434,15 +466,20 @@ xchk_iscan_start(
unsigned int iget_retry_delay,
struct xchk_iscan *iscan)
{
xfs_ino_t start_ino;
start_ino = xchk_iscan_rotor(sc->mp);
iscan->sc = sc;
clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
iscan->iget_timeout = iget_timeout;
iscan->iget_retry_delay = iget_retry_delay;
iscan->__visited_ino = 0;
iscan->cursor_ino = 0;
iscan->__visited_ino = start_ino;
iscan->cursor_ino = start_ino;
iscan->scan_start_ino = start_ino;
mutex_init(&iscan->lock);
trace_xchk_iscan_start(iscan);
trace_xchk_iscan_start(iscan, start_ino);
}
/*
@ -471,15 +508,45 @@ xchk_iscan_want_live_update(
struct xchk_iscan *iscan,
xfs_ino_t ino)
{
bool ret;
bool ret = false;
if (xchk_iscan_aborted(iscan))
return false;
mutex_lock(&iscan->lock);
trace_xchk_iscan_want_live_update(iscan, ino);
ret = iscan->__visited_ino >= ino;
mutex_unlock(&iscan->lock);
trace_xchk_iscan_want_live_update(iscan, ino);
/* Scan is finished, caller should receive all updates. */
if (iscan->__visited_ino == NULLFSINO) {
ret = true;
goto unlock;
}
/*
* The visited cursor hasn't yet wrapped around the end of the FS. If
* @ino is inside the starred range, the caller should receive updates:
*
* 0 ------------ S ************ V ------------ EOFS
*/
if (iscan->scan_start_ino <= iscan->__visited_ino) {
if (ino >= iscan->scan_start_ino &&
ino <= iscan->__visited_ino)
ret = true;
goto unlock;
}
/*
* The visited cursor wrapped around the end of the FS. If @ino is
* inside the starred range, the caller should receive updates:
*
* 0 ************ V ------------ S ************ EOFS
*/
if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
ret = true;
unlock:
mutex_unlock(&iscan->lock);
return ret;
}

View File

@ -12,6 +12,13 @@ struct xchk_iscan {
/* Lock to protect the scan cursor. */
struct mutex lock;
/*
* This is the first inode in the inumber address space that we
* examined. When the scan wraps around back to here, the scan is
* finished.
*/
xfs_ino_t scan_start_ino;
/* This is the inode that will be examined next. */
xfs_ino_t cursor_ino;

View File

@ -1173,25 +1173,27 @@ DEFINE_EVENT(xchk_iscan_class, name, \
DEFINE_ISCAN_EVENT(xchk_iscan_move_cursor);
DEFINE_ISCAN_EVENT(xchk_iscan_visit);
DEFINE_ISCAN_EVENT(xchk_iscan_advance_ag);
DEFINE_ISCAN_EVENT(xchk_iscan_start);
DECLARE_EVENT_CLASS(xchk_iscan_ino_class,
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino),
TP_ARGS(iscan, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, startino)
__field(xfs_ino_t, cursor)
__field(xfs_ino_t, visited)
__field(xfs_ino_t, ino)
),
TP_fast_assign(
__entry->dev = iscan->sc->mp->m_super->s_dev;
__entry->startino = iscan->scan_start_ino;
__entry->cursor = iscan->cursor_ino;
__entry->visited = iscan->__visited_ino;
__entry->ino = ino;
),
TP_printk("dev %d:%d iscan cursor 0x%llx visited 0x%llx ino 0x%llx",
TP_printk("dev %d:%d iscan start 0x%llx cursor 0x%llx visited 0x%llx ino 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->startino,
__entry->cursor,
__entry->visited,
__entry->ino)
@ -1201,6 +1203,7 @@ DEFINE_EVENT(xchk_iscan_ino_class, name, \
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino), \
TP_ARGS(iscan, ino))
DEFINE_ISCAN_INO_EVENT(xchk_iscan_want_live_update);
DEFINE_ISCAN_INO_EVENT(xchk_iscan_start);
TRACE_EVENT(xchk_iscan_iget,
TP_PROTO(struct xchk_iscan *iscan, int error),