xfs: debug mode log record crc error injection
XFS now uses CRC verification over a limited section of the log to detect torn writes prior to a crash. This is difficult to test directly due to the timing and hardware requirements to cause a short write. Add a mechanism to inject CRC errors into log records to facilitate testing torn write detection during log recovery. This mechanism is dangerous and can result in filesystem corruption. Thus, it is only available in DEBUG mode for testing/development purposes. Set a non-zero value to the following sysfs entry to enable error injection: /sys/fs/xfs/<dev>/log/log_badcrc_factor Once enabled, XFS intentionally writes an invalid CRC to a log record at some random point in the future based on the provided frequency. The filesystem immediately shuts down once the record has been written to the physical log to prevent metadata writeback (e.g., AIL insertion) once the log write completes. This helps reasonably simulate a torn write to the log as the affected record must be safe to discard. The next mount after the intentional shutdown requires log recovery and should detect and recover from the torn write. Note again that this _will_ result in data loss or worse. For testing and development purposes only! Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
7088c4136f
commit
609adfc2ed
@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
|
||||
int aborted = 0;
|
||||
|
||||
/*
|
||||
* Race to shutdown the filesystem if we see an error.
|
||||
* Race to shutdown the filesystem if we see an error or the iclog is in
|
||||
* IOABORT state. The IOABORT state is only set in DEBUG mode to inject
|
||||
* CRC errors into log recovery.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
|
||||
XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
|
||||
if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
|
||||
XFS_RANDOM_IODONE_IOERR) ||
|
||||
iclog->ic_state & XLOG_STATE_IOABORT) {
|
||||
if (iclog->ic_state & XLOG_STATE_IOABORT)
|
||||
iclog->ic_state &= ~XLOG_STATE_IOABORT;
|
||||
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
|
||||
@ -1838,6 +1844,23 @@ xlog_sync(
|
||||
/* calculcate the checksum */
|
||||
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
|
||||
iclog->ic_datap, size);
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Intentionally corrupt the log record CRC based on the error injection
|
||||
* frequency, if defined. This facilitates testing log recovery in the
|
||||
* event of torn writes. Hence, set the IOABORT state to abort the log
|
||||
* write on I/O completion and shutdown the fs. The subsequent mount
|
||||
* detects the bad CRC and attempts to recover.
|
||||
*/
|
||||
if (log->l_badcrc_factor &&
|
||||
(prandom_u32() % log->l_badcrc_factor == 0)) {
|
||||
iclog->ic_header.h_crc &= 0xAAAAAAAA;
|
||||
iclog->ic_state |= XLOG_STATE_IOABORT;
|
||||
xfs_warn(log->l_mp,
|
||||
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
|
||||
be64_to_cpu(iclog->ic_header.h_lsn));
|
||||
}
|
||||
#endif
|
||||
|
||||
bp->b_io_length = BTOBB(count);
|
||||
bp->b_fspriv = iclog;
|
||||
@ -2791,11 +2814,19 @@ xlog_state_do_callback(
|
||||
}
|
||||
} while (!ioerrors && loopdidcallbacks);
|
||||
|
||||
/*
|
||||
* make one last gasp attempt to see if iclogs are being left in
|
||||
* limbo..
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Make one last gasp attempt to see if iclogs are being left in limbo.
|
||||
* If the above loop finds an iclog earlier than the current iclog and
|
||||
* in one of the syncing states, the current iclog is put into
|
||||
* DO_CALLBACK and the callbacks are deferred to the completion of the
|
||||
* earlier iclog. Walk the iclogs in order and make sure that no iclog
|
||||
* is in DO_CALLBACK unless an earlier iclog is in one of the syncing
|
||||
* states.
|
||||
*
|
||||
* Note that SYNCING|IOABORT is a valid state so we cannot just check
|
||||
* for ic_state == SYNCING.
|
||||
*/
|
||||
if (funcdidcallbacks) {
|
||||
first_iclog = iclog = log->l_iclog;
|
||||
do {
|
||||
@ -2810,7 +2841,7 @@ xlog_state_do_callback(
|
||||
* IOERROR - give up hope all ye who enter here
|
||||
*/
|
||||
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
|
||||
iclog->ic_state == XLOG_STATE_SYNCING ||
|
||||
iclog->ic_state & XLOG_STATE_SYNCING ||
|
||||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
|
||||
iclog->ic_state == XLOG_STATE_IOERROR )
|
||||
break;
|
||||
|
@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
|
||||
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
|
||||
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
|
||||
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
|
||||
#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
|
||||
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
|
||||
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
|
||||
|
||||
@ -410,6 +411,8 @@ struct xlog {
|
||||
/* The following field are used for debugging; need to hold icloglock */
|
||||
#ifdef DEBUG
|
||||
void *l_iclog_bak[XLOG_MAX_ICLOGS];
|
||||
/* log record crc error injection factor */
|
||||
uint32_t l_badcrc_factor;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
@ -255,11 +255,47 @@ write_grant_head_show(
|
||||
}
|
||||
XFS_SYSFS_ATTR_RO(write_grant_head);
|
||||
|
||||
#ifdef DEBUG
|
||||
STATIC ssize_t
|
||||
log_badcrc_factor_store(
|
||||
struct kobject *kobject,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct xlog *log = to_xlog(kobject);
|
||||
int ret;
|
||||
uint32_t val;
|
||||
|
||||
ret = kstrtouint(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
log->l_badcrc_factor = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
log_badcrc_factor_show(
|
||||
struct kobject *kobject,
|
||||
char *buf)
|
||||
{
|
||||
struct xlog *log = to_xlog(kobject);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
|
||||
}
|
||||
|
||||
XFS_SYSFS_ATTR_RW(log_badcrc_factor);
|
||||
#endif /* DEBUG */
|
||||
|
||||
static struct attribute *xfs_log_attrs[] = {
|
||||
ATTR_LIST(log_head_lsn),
|
||||
ATTR_LIST(log_tail_lsn),
|
||||
ATTR_LIST(reserve_grant_head),
|
||||
ATTR_LIST(write_grant_head),
|
||||
#ifdef DEBUG
|
||||
ATTR_LIST(log_badcrc_factor),
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user