scsi: core: Avoid leaving shost->last_reset with stale value if EH does not run
The changes to issue the abort from the scmd->abort_work instead of the EH
thread introduced a problem if eh_deadline is used. If aborting the
command(s) is successful, and there are never any scmds added to the
shost->eh_cmd_q, there is no code path which will reset the ->last_reset
value back to zero.
The effect of this is that after a successful abort with no EH thread
activity, a subsequent timeout, perhaps a long time later, might
immediately be considered past a user-set eh_deadline time, and the host
will be reset with no attempt at recovery.
Fix this by resetting ->last_reset back to zero in scmd_eh_abort_handler()
if it is determined that the EH thread will not run to do this.
Thanks to Gopinath Marappan for investigating this problem.
Link: https://lore.kernel.org/r/20211029194311.17504-2-emilne@redhat.com
Fixes: e494f6a728 ("[SCSI] improved eh timeout handler")
Cc: stable@vger.kernel.org
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
committed by
Martin K. Petersen
parent
5f7cf82c1d
commit
5ae17501bc
@@ -387,6 +387,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
|
|||||||
shost->shost_state = SHOST_CREATED;
|
shost->shost_state = SHOST_CREATED;
|
||||||
INIT_LIST_HEAD(&shost->__devices);
|
INIT_LIST_HEAD(&shost->__devices);
|
||||||
INIT_LIST_HEAD(&shost->__targets);
|
INIT_LIST_HEAD(&shost->__targets);
|
||||||
|
INIT_LIST_HEAD(&shost->eh_abort_list);
|
||||||
INIT_LIST_HEAD(&shost->eh_cmd_q);
|
INIT_LIST_HEAD(&shost->eh_cmd_q);
|
||||||
INIT_LIST_HEAD(&shost->starved_list);
|
INIT_LIST_HEAD(&shost->starved_list);
|
||||||
init_waitqueue_head(&shost->host_wait);
|
init_waitqueue_head(&shost->host_wait);
|
||||||
|
|||||||
@@ -133,6 +133,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(shost->host_lock, flags);
|
||||||
|
list_del_init(&scmd->eh_entry);
|
||||||
|
/*
|
||||||
|
* If the abort succeeds, and there is no further
|
||||||
|
* EH action, clear the ->last_reset time.
|
||||||
|
*/
|
||||||
|
if (list_empty(&shost->eh_abort_list) &&
|
||||||
|
list_empty(&shost->eh_cmd_q))
|
||||||
|
if (shost->eh_deadline != -1)
|
||||||
|
shost->last_reset = 0;
|
||||||
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scmd_eh_abort_handler - Handle command aborts
|
* scmd_eh_abort_handler - Handle command aborts
|
||||||
* @work: command to be aborted.
|
* @work: command to be aborted.
|
||||||
@@ -150,6 +167,7 @@ scmd_eh_abort_handler(struct work_struct *work)
|
|||||||
container_of(work, struct scsi_cmnd, abort_work.work);
|
container_of(work, struct scsi_cmnd, abort_work.work);
|
||||||
struct scsi_device *sdev = scmd->device;
|
struct scsi_device *sdev = scmd->device;
|
||||||
enum scsi_disposition rtn;
|
enum scsi_disposition rtn;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
if (scsi_host_eh_past_deadline(sdev->host)) {
|
if (scsi_host_eh_past_deadline(sdev->host)) {
|
||||||
SCSI_LOG_ERROR_RECOVERY(3,
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
@@ -173,12 +191,14 @@ scmd_eh_abort_handler(struct work_struct *work)
|
|||||||
SCSI_LOG_ERROR_RECOVERY(3,
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
scmd_printk(KERN_WARNING, scmd,
|
scmd_printk(KERN_WARNING, scmd,
|
||||||
"retry aborted command\n"));
|
"retry aborted command\n"));
|
||||||
|
scsi_eh_complete_abort(scmd, sdev->host);
|
||||||
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
|
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
SCSI_LOG_ERROR_RECOVERY(3,
|
SCSI_LOG_ERROR_RECOVERY(3,
|
||||||
scmd_printk(KERN_WARNING, scmd,
|
scmd_printk(KERN_WARNING, scmd,
|
||||||
"finish aborted command\n"));
|
"finish aborted command\n"));
|
||||||
|
scsi_eh_complete_abort(scmd, sdev->host);
|
||||||
scsi_finish_command(scmd);
|
scsi_finish_command(scmd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -191,6 +211,9 @@ scmd_eh_abort_handler(struct work_struct *work)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_irqsave(sdev->host->host_lock, flags);
|
||||||
|
list_del_init(&scmd->eh_entry);
|
||||||
|
spin_unlock_irqrestore(sdev->host->host_lock, flags);
|
||||||
scsi_eh_scmd_add(scmd);
|
scsi_eh_scmd_add(scmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -221,6 +244,8 @@ scsi_abort_command(struct scsi_cmnd *scmd)
|
|||||||
spin_lock_irqsave(shost->host_lock, flags);
|
spin_lock_irqsave(shost->host_lock, flags);
|
||||||
if (shost->eh_deadline != -1 && !shost->last_reset)
|
if (shost->eh_deadline != -1 && !shost->last_reset)
|
||||||
shost->last_reset = jiffies;
|
shost->last_reset = jiffies;
|
||||||
|
BUG_ON(!list_empty(&scmd->eh_entry));
|
||||||
|
list_add_tail(&scmd->eh_entry, &shost->eh_abort_list);
|
||||||
spin_unlock_irqrestore(shost->host_lock, flags);
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
|
||||||
scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
|
scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
|
||||||
|
|||||||
@@ -1143,6 +1143,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
|
|||||||
cmd->sense_buffer = buf;
|
cmd->sense_buffer = buf;
|
||||||
cmd->prot_sdb = prot;
|
cmd->prot_sdb = prot;
|
||||||
cmd->flags = flags;
|
cmd->flags = flags;
|
||||||
|
INIT_LIST_HEAD(&cmd->eh_entry);
|
||||||
INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
|
INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
|
||||||
cmd->jiffies_at_alloc = jiffies_at_alloc;
|
cmd->jiffies_at_alloc = jiffies_at_alloc;
|
||||||
cmd->retries = retries;
|
cmd->retries = retries;
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ enum scsi_cmnd_submitter {
|
|||||||
struct scsi_cmnd {
|
struct scsi_cmnd {
|
||||||
struct scsi_request req;
|
struct scsi_request req;
|
||||||
struct scsi_device *device;
|
struct scsi_device *device;
|
||||||
struct list_head eh_entry; /* entry for the host eh_cmd_q */
|
struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */
|
||||||
struct delayed_work abort_work;
|
struct delayed_work abort_work;
|
||||||
|
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
|||||||
@@ -551,6 +551,7 @@ struct Scsi_Host {
|
|||||||
|
|
||||||
struct mutex scan_mutex;/* serialize scanning activity */
|
struct mutex scan_mutex;/* serialize scanning activity */
|
||||||
|
|
||||||
|
struct list_head eh_abort_list;
|
||||||
struct list_head eh_cmd_q;
|
struct list_head eh_cmd_q;
|
||||||
struct task_struct * ehandler; /* Error recovery thread. */
|
struct task_struct * ehandler; /* Error recovery thread. */
|
||||||
struct completion * eh_action; /* Wait for specific actions on the
|
struct completion * eh_action; /* Wait for specific actions on the
|
||||||
|
|||||||
Reference in New Issue
Block a user