[PATCH] libata-eh-fw: update ata_scsi_error() for new EH

Update ata_scsi_error() for new EH.  ata_scsi_error() is responsible
for claiming timed out qcs and invoking ->error_handler in safe and
synchronized manner.  As the state of the controller is unknown if a
qc has timed out, the port is frozen in such cases.

Note that ata_scsi_timed_out() isn't used for new EH.  This is because
a timed out qc cannot be claimed by EH without freezing the port and
freezing the port in ata_scsi_timed_out() results in unnecessary
abortion of other active qcs.  ata_scsi_timed_out() can be removed
once all drivers are converted to new EH.

While at it, add 'TODO: kill' comments to old EH functions.

Signed-off-by: Tejun Heo <htejun@gmail.com>
This commit is contained in:
Tejun Heo 2006-05-15 20:58:12 +09:00
parent dafadcde8d
commit ad9e276244
2 changed files with 134 additions and 5 deletions

View File

@ -44,6 +44,8 @@
#include "libata.h"
static void __ata_port_freeze(struct ata_port *ap);
/**
* ata_scsi_timed_out - SCSI layer time out callback
* @cmd: timed out SCSI command
@ -55,6 +57,8 @@
* from finishing it by setting EH_SCHEDULED and return
* EH_NOT_HANDLED.
*
* TODO: kill this function once old EH is gone.
*
* LOCKING:
* Called from timer context
*
@ -67,10 +71,16 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
struct ata_port *ap = ata_shost_to_port(host);
unsigned long flags;
struct ata_queued_cmd *qc;
enum scsi_eh_timer_return ret = EH_HANDLED;
enum scsi_eh_timer_return ret;
DPRINTK("ENTER\n");
if (ap->ops->error_handler) {
ret = EH_NOT_HANDLED;
goto out;
}
ret = EH_HANDLED;
spin_lock_irqsave(&ap->host_set->lock, flags);
qc = ata_qc_from_tag(ap, ap->active_tag);
if (qc) {
@ -81,6 +91,7 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
}
spin_unlock_irqrestore(&ap->host_set->lock, flags);
out:
DPRINTK("EXIT, ret=%d\n", ret);
return ret;
}
@ -100,21 +111,132 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
void ata_scsi_error(struct Scsi_Host *host)
{
struct ata_port *ap = ata_shost_to_port(host);
spinlock_t *hs_lock = &ap->host_set->lock;
int i, repeat_cnt = ATA_EH_MAX_REPEAT;
unsigned long flags;
DPRINTK("ENTER\n");
/* synchronize with IRQ handler and port task */
spin_unlock_wait(&ap->host_set->lock);
/* synchronize with port task */
ata_port_flush_task(ap);
WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
/* synchronize with host_set lock and sort out timeouts */
ap->ops->eng_timeout(ap);
/* For new EH, all qcs are finished in one of three ways -
* normal completion, error completion, and SCSI timeout.
* Both cmpletions can race against SCSI timeout. When normal
* completion wins, the qc never reaches EH. When error
* completion wins, the qc has ATA_QCFLAG_FAILED set.
*
* When SCSI timeout wins, things are a bit more complex.
* Normal or error completion can occur after the timeout but
* before this point. In such cases, both types of
* completions are honored. A scmd is determined to have
* timed out iff its associated qc is active and not failed.
*/
if (ap->ops->error_handler) {
struct scsi_cmnd *scmd, *tmp;
int nr_timedout = 0;
spin_lock_irqsave(hs_lock, flags);
list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
struct ata_queued_cmd *qc;
for (i = 0; i < ATA_MAX_QUEUE; i++) {
qc = __ata_qc_from_tag(ap, i);
if (qc->flags & ATA_QCFLAG_ACTIVE &&
qc->scsicmd == scmd)
break;
}
if (i < ATA_MAX_QUEUE) {
/* the scmd has an associated qc */
if (!(qc->flags & ATA_QCFLAG_FAILED)) {
/* which hasn't failed yet, timeout */
qc->err_mask |= AC_ERR_TIMEOUT;
qc->flags |= ATA_QCFLAG_FAILED;
nr_timedout++;
}
} else {
/* Normal completion occurred after
* SCSI timeout but before this point.
* Successfully complete it.
*/
scmd->retries = scmd->allowed;
scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
}
}
/* If we have timed out qcs. They belong to EH from
* this point but the state of the controller is
* unknown. Freeze the port to make sure the IRQ
* handler doesn't diddle with those qcs. This must
* be done atomically w.r.t. setting QCFLAG_FAILED.
*/
if (nr_timedout)
__ata_port_freeze(ap);
spin_unlock_irqrestore(hs_lock, flags);
} else
spin_unlock_wait(hs_lock);
repeat:
/* invoke error handler */
if (ap->ops->error_handler) {
/* clear EH pending */
spin_lock_irqsave(hs_lock, flags);
ap->flags &= ~ATA_FLAG_EH_PENDING;
spin_unlock_irqrestore(hs_lock, flags);
/* invoke EH */
ap->ops->error_handler(ap);
/* Exception might have happend after ->error_handler
* recovered the port but before this point. Repeat
* EH in such case.
*/
spin_lock_irqsave(hs_lock, flags);
if (ap->flags & ATA_FLAG_EH_PENDING) {
if (--repeat_cnt) {
ata_port_printk(ap, KERN_INFO,
"EH pending after completion, "
"repeating EH (cnt=%d)\n", repeat_cnt);
spin_unlock_irqrestore(hs_lock, flags);
goto repeat;
}
ata_port_printk(ap, KERN_ERR, "EH pending after %d "
"tries, giving up\n", ATA_EH_MAX_REPEAT);
}
/* Clear host_eh_scheduled while holding hs_lock such
* that if exception occurs after this point but
* before EH completion, SCSI midlayer will
* re-initiate EH.
*/
host->host_eh_scheduled = 0;
spin_unlock_irqrestore(hs_lock, flags);
} else {
WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
ap->ops->eng_timeout(ap);
}
/* finish or retry handled scmd's and clean up */
WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
scsi_eh_flush_done_q(&ap->eh_done_q);
/* clean up */
spin_lock_irqsave(hs_lock, flags);
if (ap->flags & ATA_FLAG_RECOVERED)
ata_port_printk(ap, KERN_INFO, "EH complete\n");
ap->flags &= ~ATA_FLAG_RECOVERED;
spin_unlock_irqrestore(hs_lock, flags);
DPRINTK("EXIT\n");
}
@ -133,6 +255,8 @@ void ata_scsi_error(struct Scsi_Host *host)
* an interrupt was not delivered to the driver, even though the
* transaction completed successfully.
*
* TODO: kill this function once old EH is gone.
*
* LOCKING:
* Inherited from SCSI layer (none, can sleep)
*/
@ -198,6 +322,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
* an interrupt was not delivered to the driver, even though the
* transaction completed successfully.
*
* TODO: kill this function once old EH is gone.
*
* LOCKING:
* Inherited from SCSI layer (none, can sleep)
*/

View File

@ -225,6 +225,9 @@ enum {
ATA_PORT_PRIMARY = (1 << 0),
ATA_PORT_SECONDARY = (1 << 1),
/* max repeat if error condition is still set after ->error_handler */
ATA_EH_MAX_REPEAT = 5,
/* how hard are we gonna try to probe/recover devices */
ATA_PROBE_MAX_TRIES = 3,
};