linux/drivers/s390/scsi/zfcp_erp.c

2850 lines
78 KiB
C
Raw Normal View History

/*
* This file is part of the zfcp device driver for
* FCP adapters for IBM System z9 and zSeries.
*
* (C) Copyright IBM Corp. 2002, 2006
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define ZFCP_LOG_AREA ZFCP_LOG_AREA_ERP
#include "zfcp_ext.h"
static int zfcp_erp_adapter_reopen_internal(struct zfcp_adapter *, int, u8,
void *);
static int zfcp_erp_port_forced_reopen_internal(struct zfcp_port *, int, u8,
void *);
static int zfcp_erp_port_reopen_internal(struct zfcp_port *, int, u8, void *);
static int zfcp_erp_unit_reopen_internal(struct zfcp_unit *, int, u8, void *);
static int zfcp_erp_port_reopen_all_internal(struct zfcp_adapter *, int, u8,
void *);
static int zfcp_erp_unit_reopen_all_internal(struct zfcp_port *, int, u8,
void *);
static void zfcp_erp_adapter_block(struct zfcp_adapter *, int);
static void zfcp_erp_adapter_unblock(struct zfcp_adapter *);
static void zfcp_erp_port_block(struct zfcp_port *, int);
static void zfcp_erp_port_unblock(struct zfcp_port *);
static void zfcp_erp_unit_block(struct zfcp_unit *, int);
static void zfcp_erp_unit_unblock(struct zfcp_unit *);
static int zfcp_erp_thread(void *);
static int zfcp_erp_strategy(struct zfcp_erp_action *);
static int zfcp_erp_strategy_do_action(struct zfcp_erp_action *);
static int zfcp_erp_strategy_memwait(struct zfcp_erp_action *);
static int zfcp_erp_strategy_check_target(struct zfcp_erp_action *, int);
static int zfcp_erp_strategy_check_unit(struct zfcp_unit *, int);
static int zfcp_erp_strategy_check_port(struct zfcp_port *, int);
static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *, int);
static int zfcp_erp_strategy_statechange(int, u32, struct zfcp_adapter *,
struct zfcp_port *,
struct zfcp_unit *, int);
static int zfcp_erp_strategy_statechange_detected(atomic_t *, u32);
static int zfcp_erp_strategy_followup_actions(int, struct zfcp_adapter *,
struct zfcp_port *,
struct zfcp_unit *, int);
static int zfcp_erp_strategy_check_queues(struct zfcp_adapter *);
static int zfcp_erp_strategy_check_action(struct zfcp_erp_action *, int);
static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int);
static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *);
static int zfcp_erp_adapter_strategy_open_fsf_statusread(
struct zfcp_erp_action *);
static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *);
static int zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_clearstati(struct zfcp_port *);
static int zfcp_erp_port_strategy_close(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open_nameserver(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open_nameserver_wakeup(
struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open_common_lookup(struct zfcp_erp_action *);
static int zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *);
static int zfcp_erp_unit_strategy(struct zfcp_erp_action *);
static int zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *);
static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *);
static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *);
static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
static void zfcp_erp_action_dismiss_port(struct zfcp_port *);
static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
static void zfcp_erp_action_dismiss(struct zfcp_erp_action *);
static int zfcp_erp_action_enqueue(int, struct zfcp_adapter *,
struct zfcp_port *, struct zfcp_unit *,
u8 id, void *ref);
static int zfcp_erp_action_dequeue(struct zfcp_erp_action *);
static void zfcp_erp_action_cleanup(int, struct zfcp_adapter *,
struct zfcp_port *, struct zfcp_unit *,
int);
static void zfcp_erp_action_ready(struct zfcp_erp_action *);
static int zfcp_erp_action_exists(struct zfcp_erp_action *);
static void zfcp_erp_action_to_ready(struct zfcp_erp_action *);
static void zfcp_erp_action_to_running(struct zfcp_erp_action *);
static void zfcp_erp_memwait_handler(unsigned long);
/**
* zfcp_close_fsf - stop FSF operations for an adapter
*
* Dismiss and cleanup all pending fsf_reqs (this wakes up all initiators of
* requests waiting for completion; especially this returns SCSI commands
* with error state).
*/
static void zfcp_close_fsf(struct zfcp_adapter *adapter)
{
/* close queues to ensure that buffers are not accessed by adapter */
zfcp_qdio_close(adapter);
zfcp_fsf_req_dismiss_all(adapter);
/* reset FSF request sequence number */
adapter->fsf_req_seq_no = 0;
/* all ports and units are closed */
zfcp_erp_modify_adapter_status(adapter, 24, NULL,
ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
}
/**
* zfcp_fsf_request_timeout_handler - called if a request timed out
* @data: pointer to adapter for handler function
*
* This function needs to be called if requests (ELS, Generic Service,
* or SCSI commands) exceed a certain time limit. The assumption is
* that after the time limit the adapter get stuck. So we trigger a reopen of
* the adapter.
*/
static void zfcp_fsf_request_timeout_handler(unsigned long data)
{
struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, 62,
NULL);
}
void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, unsigned long timeout)
{
fsf_req->timer.function = zfcp_fsf_request_timeout_handler;
fsf_req->timer.data = (unsigned long) fsf_req->adapter;
fsf_req->timer.expires = jiffies + timeout;
add_timer(&fsf_req->timer);
}
/*
* function:
*
* purpose: called if an adapter failed,
* initiates adapter recovery which is done
* asynchronously
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
static int zfcp_erp_adapter_reopen_internal(struct zfcp_adapter *adapter,
int clear_mask, u8 id, void *ref)
{
int retval;
ZFCP_LOG_DEBUG("reopen adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
zfcp_erp_adapter_block(adapter, clear_mask);
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &adapter->status)) {
ZFCP_LOG_DEBUG("skipped reopen of failed adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
/* ensure propagation of failed status to new devices */
zfcp_erp_adapter_failed(adapter, 13, NULL);
retval = -EIO;
goto out;
}
retval = zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER,
adapter, NULL, NULL, id, ref);
out:
return retval;
}
/*
* function:
*
* purpose: Wrappper for zfcp_erp_adapter_reopen_internal
* used to ensure the correct locking
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
int zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear_mask,
u8 id, void *ref)
{
int retval;
unsigned long flags;
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
retval = zfcp_erp_adapter_reopen_internal(adapter, clear_mask, id, ref);
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return retval;
}
int zfcp_erp_adapter_shutdown(struct zfcp_adapter *adapter, int clear_mask,
u8 id, void *ref)
{
int retval;
retval = zfcp_erp_adapter_reopen(adapter,
ZFCP_STATUS_COMMON_RUNNING |
ZFCP_STATUS_COMMON_ERP_FAILED |
clear_mask, id, ref);
return retval;
}
int zfcp_erp_port_shutdown(struct zfcp_port *port, int clear_mask, u8 id,
void *ref)
{
int retval;
retval = zfcp_erp_port_reopen(port,
ZFCP_STATUS_COMMON_RUNNING |
ZFCP_STATUS_COMMON_ERP_FAILED |
clear_mask, id, ref);
return retval;
}
int zfcp_erp_unit_shutdown(struct zfcp_unit *unit, int clear_mask, u8 id,
void *ref)
{
int retval;
retval = zfcp_erp_unit_reopen(unit,
ZFCP_STATUS_COMMON_RUNNING |
ZFCP_STATUS_COMMON_ERP_FAILED |
clear_mask, id, ref);
return retval;
}
/*
* function:
*
* purpose: called if a port failed to be opened normally
* initiates Forced Reopen recovery which is done
* asynchronously
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
static int zfcp_erp_port_forced_reopen_internal(struct zfcp_port *port,
int clear_mask, u8 id,
void *ref)
{
int retval;
ZFCP_LOG_DEBUG("forced reopen of port 0x%016Lx on adapter %s\n",
port->wwpn, zfcp_get_busid_by_port(port));
zfcp_erp_port_block(port, clear_mask);
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &port->status)) {
ZFCP_LOG_DEBUG("skipped forced reopen of failed port 0x%016Lx "
"on adapter %s\n", port->wwpn,
zfcp_get_busid_by_port(port));
retval = -EIO;
goto out;
}
retval = zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
port->adapter, port, NULL, id, ref);
out:
return retval;
}
/*
* function:
*
* purpose: Wrappper for zfcp_erp_port_forced_reopen_internal
* used to ensure the correct locking
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
int zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear_mask, u8 id,
void *ref)
{
int retval;
unsigned long flags;
struct zfcp_adapter *adapter;
adapter = port->adapter;
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
retval = zfcp_erp_port_forced_reopen_internal(port, clear_mask, id,
ref);
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return retval;
}
/*
* function:
*
* purpose: called if a port is to be opened
* initiates Reopen recovery which is done
* asynchronously
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
static int zfcp_erp_port_reopen_internal(struct zfcp_port *port, int clear_mask,
u8 id, void *ref)
{
int retval;
ZFCP_LOG_DEBUG("reopen of port 0x%016Lx on adapter %s\n",
port->wwpn, zfcp_get_busid_by_port(port));
zfcp_erp_port_block(port, clear_mask);
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &port->status)) {
ZFCP_LOG_DEBUG("skipped reopen of failed port 0x%016Lx "
"on adapter %s\n", port->wwpn,
zfcp_get_busid_by_port(port));
/* ensure propagation of failed status to new devices */
zfcp_erp_port_failed(port, 14, NULL);
retval = -EIO;
goto out;
}
retval = zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT,
port->adapter, port, NULL, id, ref);
out:
return retval;
}
/**
* zfcp_erp_port_reopen - initiate reopen of a remote port
* @port: port to be reopened
* @clear_mask: specifies flags in port status to be cleared
* Return: 0 on success, < 0 on error
*
* This is a wrappper function for zfcp_erp_port_reopen_internal. It ensures
* correct locking. An error recovery task is initiated to do the reopen.
* To wait for the completion of the reopen zfcp_erp_wait should be used.
*/
int zfcp_erp_port_reopen(struct zfcp_port *port, int clear_mask, u8 id,
void *ref)
{
int retval;
unsigned long flags;
struct zfcp_adapter *adapter = port->adapter;
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
retval = zfcp_erp_port_reopen_internal(port, clear_mask, id, ref);
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return retval;
}
/*
* function:
*
* purpose: called if a unit is to be opened
* initiates Reopen recovery which is done
* asynchronously
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
static int zfcp_erp_unit_reopen_internal(struct zfcp_unit *unit, int clear_mask,
u8 id, void *ref)
{
int retval;
struct zfcp_adapter *adapter = unit->port->adapter;
ZFCP_LOG_DEBUG("reopen of unit 0x%016Lx on port 0x%016Lx "
"on adapter %s\n", unit->fcp_lun,
unit->port->wwpn, zfcp_get_busid_by_unit(unit));
zfcp_erp_unit_block(unit, clear_mask);
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &unit->status)) {
ZFCP_LOG_DEBUG("skipped reopen of failed unit 0x%016Lx "
"on port 0x%016Lx on adapter %s\n",
unit->fcp_lun, unit->port->wwpn,
zfcp_get_busid_by_unit(unit));
retval = -EIO;
goto out;
}
retval = zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_UNIT,
adapter, unit->port, unit, id, ref);
out:
return retval;
}
/**
* zfcp_erp_unit_reopen - initiate reopen of a unit
* @unit: unit to be reopened
* @clear_mask: specifies flags in unit status to be cleared
* Return: 0 on success, < 0 on error
*
* This is a wrappper for zfcp_erp_unit_reopen_internal. It ensures correct
* locking. An error recovery task is initiated to do the reopen.
* To wait for the completion of the reopen zfcp_erp_wait should be used.
*/
int zfcp_erp_unit_reopen(struct zfcp_unit *unit, int clear_mask, u8 id,
void *ref)
{
int retval;
unsigned long flags;
struct zfcp_adapter *adapter;
struct zfcp_port *port;
port = unit->port;
adapter = port->adapter;
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
retval = zfcp_erp_unit_reopen_internal(unit, clear_mask, id, ref);
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return retval;
}
/**
* zfcp_erp_adapter_block - mark adapter as blocked, block scsi requests
*/
static void zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
{
zfcp_erp_modify_adapter_status(adapter, 15, NULL,
ZFCP_STATUS_COMMON_UNBLOCKED |
clear_mask, ZFCP_CLEAR);
}
/* FIXME: isn't really atomic */
/*
* returns the mask which has not been set so far, i.e.
* 0 if no bit has been changed, !0 if some bit has been changed
*/
static int atomic_test_and_set_mask(unsigned long mask, atomic_t *v)
{
int changed_bits = (atomic_read(v) /*XOR*/^ mask) & mask;
atomic_set_mask(mask, v);
return changed_bits;
}
/* FIXME: isn't really atomic */
/*
* returns the mask which has not been cleared so far, i.e.
* 0 if no bit has been changed, !0 if some bit has been changed
*/
static int atomic_test_and_clear_mask(unsigned long mask, atomic_t *v)
{
int changed_bits = atomic_read(v) & mask;
atomic_clear_mask(mask, v);
return changed_bits;
}
/**
* zfcp_erp_adapter_unblock - mark adapter as unblocked, allow scsi requests
*/
static void zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
{
if (atomic_test_and_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
&adapter->status))
zfcp_rec_dbf_event_adapter(16, NULL, adapter);
}
/*
* function:
*
* purpose: disable I/O,
* return any open requests and clean them up,
* aim: no pending and incoming I/O
*
* returns:
*/
static void
zfcp_erp_port_block(struct zfcp_port *port, int clear_mask)
{
zfcp_erp_modify_port_status(port, 17, NULL,
ZFCP_STATUS_COMMON_UNBLOCKED | clear_mask,
ZFCP_CLEAR);
}
/*
* function:
*
* purpose: enable I/O
*
* returns:
*/
static void
zfcp_erp_port_unblock(struct zfcp_port *port)
{
if (atomic_test_and_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
&port->status))
zfcp_rec_dbf_event_port(18, NULL, port);
}
/*
* function:
*
* purpose: disable I/O,
* return any open requests and clean them up,
* aim: no pending and incoming I/O
*
* returns:
*/
static void
zfcp_erp_unit_block(struct zfcp_unit *unit, int clear_mask)
{
zfcp_erp_modify_unit_status(unit, 19, NULL,
ZFCP_STATUS_COMMON_UNBLOCKED | clear_mask,
ZFCP_CLEAR);
}
/*
* function:
*
* purpose: enable I/O
*
* returns:
*/
static void
zfcp_erp_unit_unblock(struct zfcp_unit *unit)
{
if (atomic_test_and_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
&unit->status))
zfcp_rec_dbf_event_unit(20, NULL, unit);
}
static void
zfcp_erp_action_ready(struct zfcp_erp_action *erp_action)
{
struct zfcp_adapter *adapter = erp_action->adapter;
zfcp_erp_action_to_ready(erp_action);
up(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread(2, adapter);
}
/*
* function:
*
* purpose:
*
* returns: <0 erp_action not found in any list
* ZFCP_ERP_ACTION_READY erp_action is in ready list
* ZFCP_ERP_ACTION_RUNNING erp_action is in running list
*
* locks: erp_lock must be held
*/
static int
zfcp_erp_action_exists(struct zfcp_erp_action *erp_action)
{
int retval = -EINVAL;
struct list_head *entry;
struct zfcp_erp_action *entry_erp_action;
struct zfcp_adapter *adapter = erp_action->adapter;
/* search in running list */
list_for_each(entry, &adapter->erp_running_head) {
entry_erp_action =
list_entry(entry, struct zfcp_erp_action, list);
if (entry_erp_action == erp_action) {
retval = ZFCP_ERP_ACTION_RUNNING;
goto out;
}
}
/* search in ready list */
list_for_each(entry, &adapter->erp_ready_head) {
entry_erp_action =
list_entry(entry, struct zfcp_erp_action, list);
if (entry_erp_action == erp_action) {
retval = ZFCP_ERP_ACTION_READY;
goto out;
}
}
out:
return retval;
}
/*
* purpose: checks current status of action (timed out, dismissed, ...)
* and does appropriate preparations (dismiss fsf request, ...)
*
* locks: called under erp_lock (disabled interrupts)
*/
static void
zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
{
struct zfcp_adapter *adapter = erp_action->adapter;
if (erp_action->fsf_req) {
/* take lock to ensure that request is not deleted meanwhile */
spin_lock(&adapter->req_list_lock);
if (zfcp_reqlist_find_safe(adapter, erp_action->fsf_req) &&
erp_action->fsf_req->erp_action == erp_action) {
/* fsf_req still exists */
/* dismiss fsf_req of timed out/dismissed erp_action */
if (erp_action->status & (ZFCP_STATUS_ERP_DISMISSED |
ZFCP_STATUS_ERP_TIMEDOUT)) {
erp_action->fsf_req->status |=
ZFCP_STATUS_FSFREQ_DISMISSED;
zfcp_rec_dbf_event_action(142, erp_action);
}
if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) {
zfcp_rec_dbf_event_action(143, erp_action);
ZFCP_LOG_NORMAL("error: erp step timed out "
"(action=%d, fsf_req=%p)\n ",
erp_action->action,
erp_action->fsf_req);
}
/*
* If fsf_req is neither dismissed nor completed
* then keep it running asynchronously and don't mess
* with the association of erp_action and fsf_req.
*/
if (erp_action->fsf_req->status &
(ZFCP_STATUS_FSFREQ_COMPLETED |
ZFCP_STATUS_FSFREQ_DISMISSED)) {
/* forget about association between fsf_req
and erp_action */
erp_action->fsf_req = NULL;
}
} else {
/*
* even if this fsf_req has gone, forget about
* association between erp_action and fsf_req
*/
erp_action->fsf_req = NULL;
}
spin_unlock(&adapter->req_list_lock);
}
}
/**
* zfcp_erp_async_handler_nolock - complete erp_action
*
* Used for normal completion, time-out, dismissal and failure after
* low memory condition.
*/
static void zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
unsigned long set_mask)
{
if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) {
erp_action->status |= set_mask;
zfcp_erp_action_ready(erp_action);
} else {
/* action is ready or gone - nothing to do */
}
}
/**
* zfcp_erp_async_handler - wrapper for erp_async_handler_nolock w/ locking
*/
void zfcp_erp_async_handler(struct zfcp_erp_action *erp_action,
unsigned long set_mask)
{
struct zfcp_adapter *adapter = erp_action->adapter;
unsigned long flags;
write_lock_irqsave(&adapter->erp_lock, flags);
zfcp_erp_async_handler_nolock(erp_action, set_mask);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
/*
* purpose: is called for erp_action which was slept waiting for
* memory becoming avaliable,
* will trigger that this action will be continued
*/
static void
zfcp_erp_memwait_handler(unsigned long data)
{
struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data;
zfcp_erp_async_handler(erp_action, 0);
}
/*
* purpose: is called if an asynchronous erp step timed out,
* action gets an appropriate flag and will be processed
* accordingly
*/
static void zfcp_erp_timeout_handler(unsigned long data)
{
struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data;
zfcp_erp_async_handler(erp_action, ZFCP_STATUS_ERP_TIMEDOUT);
}
/**
* zfcp_erp_action_dismiss - dismiss an erp_action
*
* adapter->erp_lock must be held
*
* Dismissal of an erp_action is usually required if an erp_action of
* higher priority is generated.
*/
static void zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
{
erp_action->status |= ZFCP_STATUS_ERP_DISMISSED;
if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING)
zfcp_erp_action_ready(erp_action);
}
int
zfcp_erp_thread_setup(struct zfcp_adapter *adapter)
{
int retval = 0;
atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP, &adapter->status);
retval = kernel_thread(zfcp_erp_thread, adapter, SIGCHLD);
if (retval < 0) {
ZFCP_LOG_NORMAL("error: creation of erp thread failed for "
"adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
} else {
wait_event(adapter->erp_thread_wqh,
atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP,
&adapter->status));
}
return (retval < 0);
}
/*
* function:
*
* purpose:
*
* returns:
*
* context: process (i.e. proc-fs or rmmod/insmod)
*
* note: The caller of this routine ensures that the specified
* adapter has been shut down and that this operation
* has been completed. Thus, there are no pending erp_actions
* which would need to be handled here.
*/
int
zfcp_erp_thread_kill(struct zfcp_adapter *adapter)
{
int retval = 0;
atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL, &adapter->status);
up(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread_lock(2, adapter);
wait_event(adapter->erp_thread_wqh,
!atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP,
&adapter->status));
atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL,
&adapter->status);
return retval;
}
/*
* purpose: is run as a kernel thread,
* goes through list of error recovery actions of associated adapter
* and delegates single action to execution
*
* returns: 0
*/
static int
zfcp_erp_thread(void *data)
{
struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
struct list_head *next;
struct zfcp_erp_action *erp_action;
unsigned long flags;
daemonize("zfcperp%s", zfcp_get_busid_by_adapter(adapter));
/* Block all signals */
siginitsetinv(&current->blocked, 0);
atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP, &adapter->status);
wake_up(&adapter->erp_thread_wqh);
while (!atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL,
&adapter->status)) {
write_lock_irqsave(&adapter->erp_lock, flags);
[SCSI] zfcp: fix cleanup of dismissed error recovery actions Calling zfcp_erp_strategy_check_action() after zfcp_erp_action_to_running() in zfcp_erp_strategy() might cause an unbalanced up() for erp_ready_sem, which makes the zfcp recovery fail somewhere along the way: erp thread processing erp_action: | | someone waking up erp thread for erp_action | | | | someone else dismissing erp_action: | | | V V V write_lock_irqsave(&adapter->erp_lock, flags); ... if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* first up() for erp_action */ } write_unlock_irqrestore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); ... zfcp_erp_action_to_running(erp_action); write_unlock_restore(&adapter->erp_lock, flags); /* processing erp_action */ write_lock_irqsave(&adapter->erp_lock, flags); ... erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* second, unbalanced up() for erp_action */ } ... write_unlock_restore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); retval = ZFCP_ERP_DISMISSED; } ... write_unlock_restore(&adapter->erp_lock, flags); down(&adapter->erp_ready_sem); /* this down() is meant to balance the first up() */ The erp thread must not dismiss an erp_action after moving that action to erp_running_head. Instead it should just go through the down() operation, which balances the first up(), and run through zfcp_erp_strategy one more time for the second up(), which eventually cleans up erp_action. Which is similar to the normal processing of an event for erp_action doing something asynchronously (e.g. waiting for the completion of an fsf_req). This only works if we make sure that a dismissed erp_action is passed to zfcp_erp_strategy() prior to the other action, which caused actions to be dismissed. Therefore the patch implements this rule: running actions go to the head of the ready list; new actions go to the tail of the ready list; the erp thread picks actions to be processed from the ready list's head. Signed-off-by: Martin Peschke <mp3@de.ibm.com> Acked-by: Swen Schillig <swen@vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2007-11-15 12:57:17 +00:00
next = adapter->erp_ready_head.next;
write_unlock_irqrestore(&adapter->erp_lock, flags);
if (next != &adapter->erp_ready_head) {
erp_action =
list_entry(next, struct zfcp_erp_action, list);
/*
* process action (incl. [re]moving it
* from 'ready' queue)
*/
zfcp_erp_strategy(erp_action);
}
/*
* sleep as long as there is nothing to do, i.e.
* no action in 'ready' queue to be processed and
* thread is not to be killed
*/
zfcp_rec_dbf_event_thread_lock(4, adapter);
down_interruptible(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread_lock(5, adapter);
}
atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP, &adapter->status);
wake_up(&adapter->erp_thread_wqh);
return 0;
}
/*
* function:
*
* purpose: drives single error recovery action and schedules higher and
* subordinate actions, if necessary
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_SUCCEEDED - action finished successfully (deqd)
* ZFCP_ERP_FAILED - action finished unsuccessfully (deqd)
* ZFCP_ERP_EXIT - action finished (dequeued), offline
* ZFCP_ERP_DISMISSED - action canceled (dequeued)
*/
static int
zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
{
int retval = 0;
struct zfcp_adapter *adapter = erp_action->adapter;
struct zfcp_port *port = erp_action->port;
struct zfcp_unit *unit = erp_action->unit;
int action = erp_action->action;
u32 status = erp_action->status;
unsigned long flags;
/* serialise dismissing, timing out, moving, enqueueing */
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
/* dequeue dismissed action and leave, if required */
retval = zfcp_erp_strategy_check_action(erp_action, retval);
if (retval == ZFCP_ERP_DISMISSED) {
goto unlock;
}
/*
* move action to 'running' queue before processing it
* (to avoid a race condition regarding moving the
* action to the 'running' queue and back)
*/
zfcp_erp_action_to_running(erp_action);
/*
* try to process action as far as possible,
* no lock to allow for blocking operations (kmalloc, qdio, ...),
* afterwards the lock is required again for the following reasons:
* - dequeueing of finished action and enqueueing of
* follow-up actions must be atomic so that any other
* reopen-routine does not believe there is nothing to do
* and that it is safe to enqueue something else,
* - we want to force any control thread which is dismissing
* actions to finish this before we decide about
* necessary steps to be taken here further
*/
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
retval = zfcp_erp_strategy_do_action(erp_action);
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
/*
* check for dismissed status again to avoid follow-up actions,
[SCSI] zfcp: fix cleanup of dismissed error recovery actions Calling zfcp_erp_strategy_check_action() after zfcp_erp_action_to_running() in zfcp_erp_strategy() might cause an unbalanced up() for erp_ready_sem, which makes the zfcp recovery fail somewhere along the way: erp thread processing erp_action: | | someone waking up erp thread for erp_action | | | | someone else dismissing erp_action: | | | V V V write_lock_irqsave(&adapter->erp_lock, flags); ... if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* first up() for erp_action */ } write_unlock_irqrestore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); ... zfcp_erp_action_to_running(erp_action); write_unlock_restore(&adapter->erp_lock, flags); /* processing erp_action */ write_lock_irqsave(&adapter->erp_lock, flags); ... erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* second, unbalanced up() for erp_action */ } ... write_unlock_restore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); retval = ZFCP_ERP_DISMISSED; } ... write_unlock_restore(&adapter->erp_lock, flags); down(&adapter->erp_ready_sem); /* this down() is meant to balance the first up() */ The erp thread must not dismiss an erp_action after moving that action to erp_running_head. Instead it should just go through the down() operation, which balances the first up(), and run through zfcp_erp_strategy one more time for the second up(), which eventually cleans up erp_action. Which is similar to the normal processing of an event for erp_action doing something asynchronously (e.g. waiting for the completion of an fsf_req). This only works if we make sure that a dismissed erp_action is passed to zfcp_erp_strategy() prior to the other action, which caused actions to be dismissed. Therefore the patch implements this rule: running actions go to the head of the ready list; new actions go to the tail of the ready list; the erp thread picks actions to be processed from the ready list's head. Signed-off-by: Martin Peschke <mp3@de.ibm.com> Acked-by: Swen Schillig <swen@vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2007-11-15 12:57:17 +00:00
* failing of targets and so on for dismissed actions,
* we go through down() here because there has been an up()
*/
[SCSI] zfcp: fix cleanup of dismissed error recovery actions Calling zfcp_erp_strategy_check_action() after zfcp_erp_action_to_running() in zfcp_erp_strategy() might cause an unbalanced up() for erp_ready_sem, which makes the zfcp recovery fail somewhere along the way: erp thread processing erp_action: | | someone waking up erp thread for erp_action | | | | someone else dismissing erp_action: | | | V V V write_lock_irqsave(&adapter->erp_lock, flags); ... if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* first up() for erp_action */ } write_unlock_irqrestore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); ... zfcp_erp_action_to_running(erp_action); write_unlock_restore(&adapter->erp_lock, flags); /* processing erp_action */ write_lock_irqsave(&adapter->erp_lock, flags); ... erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* second, unbalanced up() for erp_action */ } ... write_unlock_restore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); retval = ZFCP_ERP_DISMISSED; } ... write_unlock_restore(&adapter->erp_lock, flags); down(&adapter->erp_ready_sem); /* this down() is meant to balance the first up() */ The erp thread must not dismiss an erp_action after moving that action to erp_running_head. Instead it should just go through the down() operation, which balances the first up(), and run through zfcp_erp_strategy one more time for the second up(), which eventually cleans up erp_action. Which is similar to the normal processing of an event for erp_action doing something asynchronously (e.g. waiting for the completion of an fsf_req). This only works if we make sure that a dismissed erp_action is passed to zfcp_erp_strategy() prior to the other action, which caused actions to be dismissed. Therefore the patch implements this rule: running actions go to the head of the ready list; new actions go to the tail of the ready list; the erp thread picks actions to be processed from the ready list's head. Signed-off-by: Martin Peschke <mp3@de.ibm.com> Acked-by: Swen Schillig <swen@vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2007-11-15 12:57:17 +00:00
if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED)
retval = ZFCP_ERP_CONTINUES;
switch (retval) {
case ZFCP_ERP_NOMEM:
/* no memory to continue immediately, let it sleep */
if (!(erp_action->status & ZFCP_STATUS_ERP_LOWMEM)) {
++adapter->erp_low_mem_count;
erp_action->status |= ZFCP_STATUS_ERP_LOWMEM;
}
/* This condition is true if there is no memory available
for any erp_action on this adapter. This implies that there
are no elements in the memory pool(s) left for erp_actions.
This might happen if an erp_action that used a memory pool
element was timed out.
*/
if (adapter->erp_total_count == adapter->erp_low_mem_count) {
ZFCP_LOG_NORMAL("error: no mempool elements available, "
"restarting I/O on adapter %s "
"to free mempool\n",
zfcp_get_busid_by_adapter(adapter));
zfcp_erp_adapter_reopen_internal(adapter, 0, 66, NULL);
} else {
retval = zfcp_erp_strategy_memwait(erp_action);
}
goto unlock;
case ZFCP_ERP_CONTINUES:
/* leave since this action runs asynchronously */
if (erp_action->status & ZFCP_STATUS_ERP_LOWMEM) {
--adapter->erp_low_mem_count;
erp_action->status &= ~ZFCP_STATUS_ERP_LOWMEM;
}
goto unlock;
}
/* ok, finished action (whatever its result is) */
/* check for unrecoverable targets */
retval = zfcp_erp_strategy_check_target(erp_action, retval);
/* action must be dequeued (here to allow for further ones) */
zfcp_erp_action_dequeue(erp_action);
/*
* put this target through the erp mill again if someone has
* requested to change the status of a target being online
* to offline or the other way around
* (old retval is preserved if nothing has to be done here)
*/
retval = zfcp_erp_strategy_statechange(action, status, adapter,
port, unit, retval);
/*
* leave if target is in permanent error state or if
* action is repeated in order to process state change
*/
if (retval == ZFCP_ERP_EXIT) {
goto unlock;
}
/* trigger follow up actions */
zfcp_erp_strategy_followup_actions(action, adapter, port, unit, retval);
unlock:
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
if (retval != ZFCP_ERP_CONTINUES)
zfcp_erp_action_cleanup(action, adapter, port, unit, retval);
/*
* a few tasks remain when the erp queues are empty
* (don't do that if the last action evaluated was dismissed
* since this clearly indicates that there is more to come) :
* - close the name server port if it is open yet
* (enqueues another [probably] final action)
* - otherwise, wake up whoever wants to be woken when we are
* done with erp
*/
if (retval != ZFCP_ERP_DISMISSED)
zfcp_erp_strategy_check_queues(adapter);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_DISMISSED - if action has been dismissed
* retval - otherwise
*/
static int
zfcp_erp_strategy_check_action(struct zfcp_erp_action *erp_action, int retval)
{
zfcp_erp_strategy_check_fsfreq(erp_action);
if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) {
zfcp_erp_action_dequeue(erp_action);
retval = ZFCP_ERP_DISMISSED;
}
return retval;
}
static int
zfcp_erp_strategy_do_action(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_FAILED;
/*
* try to execute/continue action as far as possible,
* note: no lock in subsequent strategy routines
* (this allows these routine to call schedule, e.g.
* kmalloc with such flags or qdio_initialize & friends)
* Note: in case of timeout, the separate strategies will fail
* anyhow. No need for a special action. Even worse, a nameserver
* failure would not wake up waiting ports without the call.
*/
switch (erp_action->action) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
retval = zfcp_erp_adapter_strategy(erp_action);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
retval = zfcp_erp_port_forced_strategy(erp_action);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT:
retval = zfcp_erp_port_strategy(erp_action);
break;
case ZFCP_ERP_ACTION_REOPEN_UNIT:
retval = zfcp_erp_unit_strategy(erp_action);
break;
default:
ZFCP_LOG_NORMAL("bug: unknown erp action requested on "
"adapter %s (action=%d)\n",
zfcp_get_busid_by_adapter(erp_action->adapter),
erp_action->action);
}
return retval;
}
/*
* function:
*
* purpose: triggers retry of this action after a certain amount of time
* by means of timer provided by erp_action
*
* returns: ZFCP_ERP_CONTINUES - erp_action sleeps in erp running queue
*/
static int
zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_CONTINUES;
init_timer(&erp_action->timer);
erp_action->timer.function = zfcp_erp_memwait_handler;
erp_action->timer.data = (unsigned long) erp_action;
erp_action->timer.expires = jiffies + ZFCP_ERP_MEMWAIT_TIMEOUT;
add_timer(&erp_action->timer);
return retval;
}
/*
* function: zfcp_erp_adapter_failed
*
* purpose: sets the adapter and all underlying devices to ERP_FAILED
*
*/
void
zfcp_erp_adapter_failed(struct zfcp_adapter *adapter, u8 id, void *ref)
{
zfcp_erp_modify_adapter_status(adapter, id, ref,
ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
ZFCP_LOG_NORMAL("adapter erp failed on adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
}
/*
* function: zfcp_erp_port_failed
*
* purpose: sets the port and all underlying devices to ERP_FAILED
*
*/
void
zfcp_erp_port_failed(struct zfcp_port *port, u8 id, void *ref)
{
zfcp_erp_modify_port_status(port, id, ref,
ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
if (atomic_test_mask(ZFCP_STATUS_PORT_WKA, &port->status))
ZFCP_LOG_NORMAL("port erp failed (adapter %s, "
"port d_id=0x%06x)\n",
zfcp_get_busid_by_port(port), port->d_id);
else
ZFCP_LOG_NORMAL("port erp failed (adapter %s, wwpn=0x%016Lx)\n",
zfcp_get_busid_by_port(port), port->wwpn);
}
/*
* function: zfcp_erp_unit_failed
*
* purpose: sets the unit to ERP_FAILED
*
*/
void
zfcp_erp_unit_failed(struct zfcp_unit *unit, u8 id, void *ref)
{
zfcp_erp_modify_unit_status(unit, id, ref,
ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
ZFCP_LOG_NORMAL("unit erp failed on unit 0x%016Lx on port 0x%016Lx "
" on adapter %s\n", unit->fcp_lun,
unit->port->wwpn, zfcp_get_busid_by_unit(unit));
}
/*
* function: zfcp_erp_strategy_check_target
*
* purpose: increments the erp action count on the device currently in
* recovery if the action failed or resets the count in case of
* success. If a maximum count is exceeded the device is marked
* as ERP_FAILED.
* The 'blocked' state of a target which has been recovered
* successfully is reset.
*
* returns: ZFCP_ERP_CONTINUES - action continues (not considered)
* ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_EXIT - action failed and will not continue
*/
static int
zfcp_erp_strategy_check_target(struct zfcp_erp_action *erp_action, int result)
{
struct zfcp_adapter *adapter = erp_action->adapter;
struct zfcp_port *port = erp_action->port;
struct zfcp_unit *unit = erp_action->unit;
switch (erp_action->action) {
case ZFCP_ERP_ACTION_REOPEN_UNIT:
result = zfcp_erp_strategy_check_unit(unit, result);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
case ZFCP_ERP_ACTION_REOPEN_PORT:
result = zfcp_erp_strategy_check_port(port, result);
break;
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
result = zfcp_erp_strategy_check_adapter(adapter, result);
break;
}
return result;
}
static int
zfcp_erp_strategy_statechange(int action,
u32 status,
struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct zfcp_unit *unit, int retval)
{
switch (action) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
if (zfcp_erp_strategy_statechange_detected(&adapter->status,
status)) {
zfcp_erp_adapter_reopen_internal(adapter,
ZFCP_STATUS_COMMON_ERP_FAILED,
67, NULL);
retval = ZFCP_ERP_EXIT;
}
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
case ZFCP_ERP_ACTION_REOPEN_PORT:
if (zfcp_erp_strategy_statechange_detected(&port->status,
status)) {
zfcp_erp_port_reopen_internal(port,
ZFCP_STATUS_COMMON_ERP_FAILED,
68, NULL);
retval = ZFCP_ERP_EXIT;
}
break;
case ZFCP_ERP_ACTION_REOPEN_UNIT:
if (zfcp_erp_strategy_statechange_detected(&unit->status,
status)) {
zfcp_erp_unit_reopen_internal(unit,
ZFCP_STATUS_COMMON_ERP_FAILED,
69, NULL);
retval = ZFCP_ERP_EXIT;
}
break;
}
return retval;
}
static int
zfcp_erp_strategy_statechange_detected(atomic_t * target_status, u32 erp_status)
{
return
/* take it online */
(atomic_test_mask(ZFCP_STATUS_COMMON_RUNNING, target_status) &&
(ZFCP_STATUS_ERP_CLOSE_ONLY & erp_status)) ||
/* take it offline */
(!atomic_test_mask(ZFCP_STATUS_COMMON_RUNNING, target_status) &&
!(ZFCP_STATUS_ERP_CLOSE_ONLY & erp_status));
}
static int
zfcp_erp_strategy_check_unit(struct zfcp_unit *unit, int result)
{
switch (result) {
case ZFCP_ERP_SUCCEEDED :
atomic_set(&unit->erp_counter, 0);
zfcp_erp_unit_unblock(unit);
break;
case ZFCP_ERP_FAILED :
atomic_inc(&unit->erp_counter);
if (atomic_read(&unit->erp_counter) > ZFCP_MAX_ERPS)
zfcp_erp_unit_failed(unit, 21, NULL);
break;
case ZFCP_ERP_EXIT :
/* nothing */
break;
}
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &unit->status)) {
zfcp_erp_unit_block(unit, 0); /* for ZFCP_ERP_SUCCEEDED */
result = ZFCP_ERP_EXIT;
}
return result;
}
static int
zfcp_erp_strategy_check_port(struct zfcp_port *port, int result)
{
switch (result) {
case ZFCP_ERP_SUCCEEDED :
atomic_set(&port->erp_counter, 0);
zfcp_erp_port_unblock(port);
break;
case ZFCP_ERP_FAILED :
atomic_inc(&port->erp_counter);
if (atomic_read(&port->erp_counter) > ZFCP_MAX_ERPS)
zfcp_erp_port_failed(port, 22, NULL);
break;
case ZFCP_ERP_EXIT :
/* nothing */
break;
}
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &port->status)) {
zfcp_erp_port_block(port, 0); /* for ZFCP_ERP_SUCCEEDED */
result = ZFCP_ERP_EXIT;
}
return result;
}
static int
zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter, int result)
{
switch (result) {
case ZFCP_ERP_SUCCEEDED :
atomic_set(&adapter->erp_counter, 0);
zfcp_erp_adapter_unblock(adapter);
break;
case ZFCP_ERP_FAILED :
atomic_inc(&adapter->erp_counter);
if (atomic_read(&adapter->erp_counter) > ZFCP_MAX_ERPS)
zfcp_erp_adapter_failed(adapter, 23, NULL);
break;
case ZFCP_ERP_EXIT :
/* nothing */
break;
}
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_FAILED, &adapter->status)) {
zfcp_erp_adapter_block(adapter, 0); /* for ZFCP_ERP_SUCCEEDED */
result = ZFCP_ERP_EXIT;
}
return result;
}
struct zfcp_erp_add_work {
struct zfcp_unit *unit;
struct work_struct work;
};
/**
* zfcp_erp_scsi_scan
* @data: pointer to a struct zfcp_erp_add_work
*
* Registers a logical unit with the SCSI stack.
*/
static void zfcp_erp_scsi_scan(struct work_struct *work)
{
struct zfcp_erp_add_work *p =
container_of(work, struct zfcp_erp_add_work, work);
struct zfcp_unit *unit = p->unit;
struct fc_rport *rport = unit->port->rport;
scsi_scan_target(&rport->dev, 0, rport->scsi_target_id,
unit->scsi_lun, 0);
atomic_clear_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING, &unit->status);
zfcp_unit_put(unit);
kfree(p);
}
/**
* zfcp_erp_schedule_work
* @unit: pointer to unit which should be registered with SCSI stack
*
* Schedules work which registers a unit with the SCSI stack
*/
static void
zfcp_erp_schedule_work(struct zfcp_unit *unit)
{
struct zfcp_erp_add_work *p;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
ZFCP_LOG_NORMAL("error: Out of resources. Could not register "
"the FCP-LUN 0x%Lx connected to "
"the port with WWPN 0x%Lx connected to "
"the adapter %s with the SCSI stack.\n",
unit->fcp_lun,
unit->port->wwpn,
zfcp_get_busid_by_unit(unit));
return;
}
zfcp_unit_get(unit);
atomic_set_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING, &unit->status);
INIT_WORK(&p->work, zfcp_erp_scsi_scan);
p->unit = unit;
schedule_work(&p->work);
}
/*
* function:
*
* purpose: remaining things in good cases,
* escalation in bad cases
*
* returns:
*/
static int
zfcp_erp_strategy_followup_actions(int action,
struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct zfcp_unit *unit, int status)
{
/* initiate follow-up actions depending on success of finished action */
switch (action) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
if (status == ZFCP_ERP_SUCCEEDED)
zfcp_erp_port_reopen_all_internal(adapter, 0, 70, NULL);
else
zfcp_erp_adapter_reopen_internal(adapter, 0, 71, NULL);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
if (status == ZFCP_ERP_SUCCEEDED)
zfcp_erp_port_reopen_internal(port, 0, 72, NULL);
else
zfcp_erp_adapter_reopen_internal(adapter, 0, 73, NULL);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT:
if (status == ZFCP_ERP_SUCCEEDED)
zfcp_erp_unit_reopen_all_internal(port, 0, 74, NULL);
else
zfcp_erp_port_forced_reopen_internal(port, 0, 75, NULL);
break;
case ZFCP_ERP_ACTION_REOPEN_UNIT:
/* Nothing to do if status == ZFCP_ERP_SUCCEEDED */
if (status != ZFCP_ERP_SUCCEEDED)
zfcp_erp_port_reopen_internal(unit->port, 0, 76, NULL);
break;
}
return 0;
}
static int
zfcp_erp_strategy_check_queues(struct zfcp_adapter *adapter)
{
unsigned long flags;
read_lock_irqsave(&zfcp_data.config_lock, flags);
read_lock(&adapter->erp_lock);
if (list_empty(&adapter->erp_ready_head) &&
list_empty(&adapter->erp_running_head)) {
atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
&adapter->status);
wake_up(&adapter->erp_done_wqh);
}
read_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return 0;
}
/**
* zfcp_erp_wait - wait for completion of error recovery on an adapter
* @adapter: adapter for which to wait for completion of its error recovery
* Return: 0
*/
int
zfcp_erp_wait(struct zfcp_adapter *adapter)
{
int retval = 0;
wait_event(adapter->erp_done_wqh,
!atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
&adapter->status));
return retval;
}
void zfcp_erp_modify_adapter_status(struct zfcp_adapter *adapter, u8 id,
void *ref, u32 mask, int set_or_clear)
{
struct zfcp_port *port;
u32 changed, common_mask = mask & ZFCP_COMMON_FLAGS;
if (set_or_clear == ZFCP_SET) {
changed = atomic_test_and_set_mask(mask, &adapter->status);
} else {
changed = atomic_test_and_clear_mask(mask, &adapter->status);
if (mask & ZFCP_STATUS_COMMON_ERP_FAILED)
atomic_set(&adapter->erp_counter, 0);
}
if (changed)
zfcp_rec_dbf_event_adapter(id, ref, adapter);
/* Deal with all underlying devices, only pass common_mask */
if (common_mask)
list_for_each_entry(port, &adapter->port_list_head, list)
zfcp_erp_modify_port_status(port, id, ref, common_mask,
set_or_clear);
}
/*
* function: zfcp_erp_modify_port_status
*
* purpose: sets the port and all underlying devices to ERP_FAILED
*
*/
void zfcp_erp_modify_port_status(struct zfcp_port *port, u8 id, void *ref,
u32 mask, int set_or_clear)
{
struct zfcp_unit *unit;
u32 changed, common_mask = mask & ZFCP_COMMON_FLAGS;
if (set_or_clear == ZFCP_SET) {
changed = atomic_test_and_set_mask(mask, &port->status);
} else {
changed = atomic_test_and_clear_mask(mask, &port->status);
if (mask & ZFCP_STATUS_COMMON_ERP_FAILED)
atomic_set(&port->erp_counter, 0);
}
if (changed)
zfcp_rec_dbf_event_port(id, ref, port);
/* Modify status of all underlying devices, only pass common mask */
if (common_mask)
list_for_each_entry(unit, &port->unit_list_head, list)
zfcp_erp_modify_unit_status(unit, id, ref, common_mask,
set_or_clear);
}
/*
* function: zfcp_erp_modify_unit_status
*
* purpose: sets the unit to ERP_FAILED
*
*/
void zfcp_erp_modify_unit_status(struct zfcp_unit *unit, u8 id, void *ref,
u32 mask, int set_or_clear)
{
u32 changed;
if (set_or_clear == ZFCP_SET) {
changed = atomic_test_and_set_mask(mask, &unit->status);
} else {
changed = atomic_test_and_clear_mask(mask, &unit->status);
if (mask & ZFCP_STATUS_COMMON_ERP_FAILED) {
atomic_set(&unit->erp_counter, 0);
}
}
if (changed)
zfcp_rec_dbf_event_unit(id, ref, unit);
}
/*
* function:
*
* purpose: Wrappper for zfcp_erp_port_reopen_all_internal
* used to ensure the correct locking
*
* returns: 0 - initiated action successfully
* <0 - failed to initiate action
*/
int zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear_mask,
u8 id, void *ref)
{
int retval;
unsigned long flags;
read_lock_irqsave(&zfcp_data.config_lock, flags);
write_lock(&adapter->erp_lock);
retval = zfcp_erp_port_reopen_all_internal(adapter, clear_mask, id,
ref);
write_unlock(&adapter->erp_lock);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
return retval;
}
static int zfcp_erp_port_reopen_all_internal(struct zfcp_adapter *adapter,
int clear_mask, u8 id, void *ref)
{
int retval = 0;
struct zfcp_port *port;
list_for_each_entry(port, &adapter->port_list_head, list)
if (!atomic_test_mask(ZFCP_STATUS_PORT_WKA, &port->status))
zfcp_erp_port_reopen_internal(port, clear_mask, id,
ref);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: FIXME
*/
static int zfcp_erp_unit_reopen_all_internal(struct zfcp_port *port,
int clear_mask, u8 id, void *ref)
{
int retval = 0;
struct zfcp_unit *unit;
list_for_each_entry(unit, &port->unit_list_head, list)
zfcp_erp_unit_reopen_internal(unit, clear_mask, id, ref);
return retval;
}
/*
* function:
*
* purpose: this routine executes the 'Reopen Adapter' action
* (the entire action is processed synchronously, since
* there are no actions which might be run concurrently
* per definition)
*
* returns: ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_adapter_strategy(struct zfcp_erp_action *erp_action)
{
int retval;
struct zfcp_adapter *adapter = erp_action->adapter;
retval = zfcp_erp_adapter_strategy_close(erp_action);
if (erp_action->status & ZFCP_STATUS_ERP_CLOSE_ONLY)
retval = ZFCP_ERP_EXIT;
else
retval = zfcp_erp_adapter_strategy_open(erp_action);
if (retval == ZFCP_ERP_FAILED) {
ZFCP_LOG_INFO("Waiting to allow the adapter %s "
"to recover itself\n",
zfcp_get_busid_by_adapter(adapter));
ssleep(ZFCP_TYPE2_RECOVERY_TIME);
}
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *erp_action)
{
int retval;
atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING,
&erp_action->adapter->status);
retval = zfcp_erp_adapter_strategy_generic(erp_action, 1);
atomic_clear_mask(ZFCP_STATUS_COMMON_CLOSING,
&erp_action->adapter->status);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *erp_action)
{
int retval;
atomic_set_mask(ZFCP_STATUS_COMMON_OPENING,
&erp_action->adapter->status);
retval = zfcp_erp_adapter_strategy_generic(erp_action, 0);
atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING,
&erp_action->adapter->status);
return retval;
}
/*
* function: zfcp_register_adapter
*
* purpose: allocate the irq associated with this devno and register
* the FSF adapter with the SCSI stack
*
* returns:
*/
static int
zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *erp_action, int close)
{
int retval = ZFCP_ERP_SUCCEEDED;
if (close)
goto close_only;
retval = zfcp_erp_adapter_strategy_open_qdio(erp_action);
if (retval != ZFCP_ERP_SUCCEEDED)
goto failed_qdio;
retval = zfcp_erp_adapter_strategy_open_fsf(erp_action);
if (retval != ZFCP_ERP_SUCCEEDED)
goto failed_openfcp;
atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &erp_action->adapter->status);
goto out;
close_only:
atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
&erp_action->adapter->status);
failed_openfcp:
zfcp_close_fsf(erp_action->adapter);
failed_qdio:
atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
ZFCP_STATUS_ADAPTER_XPORT_OK,
&erp_action->adapter->status);
out:
return retval;
}
/*
* function: zfcp_qdio_init
*
* purpose: setup QDIO operation for specified adapter
*
* returns: 0 - successful setup
* !0 - failed setup
*/
static int
zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action)
{
struct zfcp_adapter *adapter = erp_action->adapter;
if (zfcp_qdio_open(adapter))
return ZFCP_ERP_FAILED;
/* initialize waitqueue used to wait for free SBALs in requests queue */
init_waitqueue_head(&adapter->request_wq);
/* ok, we did it - skip all cleanups for different failures */
atomic_set_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
return ZFCP_ERP_SUCCEEDED;
}
static int
zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_erp_adapter_strategy_open_fsf_xconfig(erp_action);
if (retval == ZFCP_ERP_FAILED)
return ZFCP_ERP_FAILED;
retval = zfcp_erp_adapter_strategy_open_fsf_xport(erp_action);
if (retval == ZFCP_ERP_FAILED)
return ZFCP_ERP_FAILED;
return zfcp_erp_adapter_strategy_open_fsf_statusread(erp_action);
}
static int
zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_SUCCEEDED;
int retries;
int sleep = ZFCP_EXCHANGE_CONFIG_DATA_FIRST_SLEEP;
struct zfcp_adapter *adapter = erp_action->adapter;
atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
for (retries = ZFCP_EXCHANGE_CONFIG_DATA_RETRIES; retries; retries--) {
atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
&adapter->status);
ZFCP_LOG_DEBUG("Doing exchange config data\n");
[PATCH] zfcp: fix incorrect usage of erp_lock ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {hardirq-on-W} -> {in-hardirq-W} usage. swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&adapter->erp_lock){+-..}, at: [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 {hardirq-on-W} state was registered at: [<000000000005f33e>] __lock_acquire+0x30a/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a7ae>] _write_lock+0x4e/0x68 [<000000000026d822>] zfcp_erp_adapter_strategy_generic+0x286/0xd94 [<000000000026fd72>] zfcp_erp_strategy_do_action+0x91e/0x1a94 [<0000000000271a3a>] zfcp_erp_thread+0x21a/0x1568 [<0000000000019096>] kernel_thread_starter+0x6/0xc [<0000000000019090>] kernel_thread_starter+0x0/0xc irq event stamp: 12078 hardirqs last enabled at (12077): [<0000000000019416>] cpu_idle+0x206/0x250 hardirqs last disabled at (12078): [<0000000000020458>] io_no_vtime+0xc/0x1c softirqs last enabled at (12072): [<0000000000040b62>] __do_softirq+0x13a/0x180 softirqs last disabled at (12059): [<000000000001fd58>] do_softirq+0xec/0xf0 other info that might help us debug this: no locks held by swapper/0. stack backtrace: 00000000012bb648 0000000000000002 0000000000000000 00000000012bb758 00000000012bb6c0 0000000000399122 0000000000399122 0000000000016b0a 0000000000000000 0000000000000001 0000000000000000 00000000004660e8 0000000000000000 000000000000000d 00000000012bb6b8 00000000012bb730 0000000000368b90 0000000000016b0a 00000000012bb6b8 00000000012bb708 Call Trace: ([<0000000000016a26>] show_trace+0x76/0xdc) [<0000000000016b2c>] show_stack+0xa0/0xd0 [<0000000000016b8a>] dump_stack+0x2e/0x3c [<000000000005e3da>] print_usage_bug+0x27e/0x290 [<000000000005e934>] mark_lock+0x548/0x6c0 [<000000000005fb0c>] __lock_acquire+0xad8/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a662>] _write_lock_irqsave+0x62/0x80 [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 [<0000000000279178>] zfcp_fsf_req_dispatch+0xd8/0x1fa8 [<000000000027e538>] zfcp_fsf_req_complete+0x104/0xe4c [<0000000000274534>] zfcp_qdio_reqid_check+0xf4/0x178 [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430 [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c [<000000000020229a>] do_adapter_IO+0xb2/0xc0 [<0000000000206f32>] do_IRQ+0x136/0x16c [<0000000000020462>] io_no_vtime+0x16/0x1c [<0000000000019432>] cpu_idle+0x222/0x250 ([<0000000000019416>] cpu_idle+0x206/0x250) [<000000000001405a>] rest_init+0x5a/0x68 [<0000000000536998>] start_kernel+0x39c/0x3dc [<0000000000013046>] _stext+0x46/0x1000 Fix incorrect usage of erp_lock. Using the write_lock() variant is wrong, since this might lead to deadlocks. Acked-by: Andreas Herrmann <aherrman@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-04 00:32:20 +00:00
write_lock_irq(&adapter->erp_lock);
zfcp_erp_action_to_running(erp_action);
[PATCH] zfcp: fix incorrect usage of erp_lock ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {hardirq-on-W} -> {in-hardirq-W} usage. swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&adapter->erp_lock){+-..}, at: [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 {hardirq-on-W} state was registered at: [<000000000005f33e>] __lock_acquire+0x30a/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a7ae>] _write_lock+0x4e/0x68 [<000000000026d822>] zfcp_erp_adapter_strategy_generic+0x286/0xd94 [<000000000026fd72>] zfcp_erp_strategy_do_action+0x91e/0x1a94 [<0000000000271a3a>] zfcp_erp_thread+0x21a/0x1568 [<0000000000019096>] kernel_thread_starter+0x6/0xc [<0000000000019090>] kernel_thread_starter+0x0/0xc irq event stamp: 12078 hardirqs last enabled at (12077): [<0000000000019416>] cpu_idle+0x206/0x250 hardirqs last disabled at (12078): [<0000000000020458>] io_no_vtime+0xc/0x1c softirqs last enabled at (12072): [<0000000000040b62>] __do_softirq+0x13a/0x180 softirqs last disabled at (12059): [<000000000001fd58>] do_softirq+0xec/0xf0 other info that might help us debug this: no locks held by swapper/0. stack backtrace: 00000000012bb648 0000000000000002 0000000000000000 00000000012bb758 00000000012bb6c0 0000000000399122 0000000000399122 0000000000016b0a 0000000000000000 0000000000000001 0000000000000000 00000000004660e8 0000000000000000 000000000000000d 00000000012bb6b8 00000000012bb730 0000000000368b90 0000000000016b0a 00000000012bb6b8 00000000012bb708 Call Trace: ([<0000000000016a26>] show_trace+0x76/0xdc) [<0000000000016b2c>] show_stack+0xa0/0xd0 [<0000000000016b8a>] dump_stack+0x2e/0x3c [<000000000005e3da>] print_usage_bug+0x27e/0x290 [<000000000005e934>] mark_lock+0x548/0x6c0 [<000000000005fb0c>] __lock_acquire+0xad8/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a662>] _write_lock_irqsave+0x62/0x80 [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 [<0000000000279178>] zfcp_fsf_req_dispatch+0xd8/0x1fa8 [<000000000027e538>] zfcp_fsf_req_complete+0x104/0xe4c [<0000000000274534>] zfcp_qdio_reqid_check+0xf4/0x178 [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430 [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c [<000000000020229a>] do_adapter_IO+0xb2/0xc0 [<0000000000206f32>] do_IRQ+0x136/0x16c [<0000000000020462>] io_no_vtime+0x16/0x1c [<0000000000019432>] cpu_idle+0x222/0x250 ([<0000000000019416>] cpu_idle+0x206/0x250) [<000000000001405a>] rest_init+0x5a/0x68 [<0000000000536998>] start_kernel+0x39c/0x3dc [<0000000000013046>] _stext+0x46/0x1000 Fix incorrect usage of erp_lock. Using the write_lock() variant is wrong, since this might lead to deadlocks. Acked-by: Andreas Herrmann <aherrman@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-04 00:32:20 +00:00
write_unlock_irq(&adapter->erp_lock);
if (zfcp_fsf_exchange_config_data(erp_action)) {
retval = ZFCP_ERP_FAILED;
ZFCP_LOG_INFO("error: initiation of exchange of "
"configuration data failed for "
"adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
break;
}
ZFCP_LOG_DEBUG("Xchange underway\n");
/*
* Why this works:
* Both the normal completion handler as well as the timeout
* handler will do an 'up' when the 'exchange config data'
* request completes or times out. Thus, the signal to go on
* won't be lost utilizing this semaphore.
* Furthermore, this 'adapter_reopen' action is
* guaranteed to be the only action being there (highest action
* which prevents other actions from being created).
* Resulting from that, the wake signal recognized here
* _must_ be the one belonging to the 'exchange config
* data' request.
*/
zfcp_rec_dbf_event_thread_lock(6, adapter);
down(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread_lock(7, adapter);
if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) {
ZFCP_LOG_INFO("error: exchange of configuration data "
"for adapter %s timed out\n",
zfcp_get_busid_by_adapter(adapter));
break;
}
if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
&adapter->status))
break;
ZFCP_LOG_DEBUG("host connection still initialising... "
"waiting and retrying...\n");
/* sleep a little bit before retry */
ssleep(sleep);
sleep *= 2;
}
atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
&adapter->status);
if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
&adapter->status)) {
ZFCP_LOG_INFO("error: exchange of configuration data for "
"adapter %s failed\n",
zfcp_get_busid_by_adapter(adapter));
retval = ZFCP_ERP_FAILED;
}
return retval;
}
static int
zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
{
int ret;
struct zfcp_adapter *adapter;
adapter = erp_action->adapter;
atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
[PATCH] zfcp: fix incorrect usage of erp_lock ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {hardirq-on-W} -> {in-hardirq-W} usage. swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&adapter->erp_lock){+-..}, at: [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 {hardirq-on-W} state was registered at: [<000000000005f33e>] __lock_acquire+0x30a/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a7ae>] _write_lock+0x4e/0x68 [<000000000026d822>] zfcp_erp_adapter_strategy_generic+0x286/0xd94 [<000000000026fd72>] zfcp_erp_strategy_do_action+0x91e/0x1a94 [<0000000000271a3a>] zfcp_erp_thread+0x21a/0x1568 [<0000000000019096>] kernel_thread_starter+0x6/0xc [<0000000000019090>] kernel_thread_starter+0x0/0xc irq event stamp: 12078 hardirqs last enabled at (12077): [<0000000000019416>] cpu_idle+0x206/0x250 hardirqs last disabled at (12078): [<0000000000020458>] io_no_vtime+0xc/0x1c softirqs last enabled at (12072): [<0000000000040b62>] __do_softirq+0x13a/0x180 softirqs last disabled at (12059): [<000000000001fd58>] do_softirq+0xec/0xf0 other info that might help us debug this: no locks held by swapper/0. stack backtrace: 00000000012bb648 0000000000000002 0000000000000000 00000000012bb758 00000000012bb6c0 0000000000399122 0000000000399122 0000000000016b0a 0000000000000000 0000000000000001 0000000000000000 00000000004660e8 0000000000000000 000000000000000d 00000000012bb6b8 00000000012bb730 0000000000368b90 0000000000016b0a 00000000012bb6b8 00000000012bb708 Call Trace: ([<0000000000016a26>] show_trace+0x76/0xdc) [<0000000000016b2c>] show_stack+0xa0/0xd0 [<0000000000016b8a>] dump_stack+0x2e/0x3c [<000000000005e3da>] print_usage_bug+0x27e/0x290 [<000000000005e934>] mark_lock+0x548/0x6c0 [<000000000005fb0c>] __lock_acquire+0xad8/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a662>] _write_lock_irqsave+0x62/0x80 [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 [<0000000000279178>] zfcp_fsf_req_dispatch+0xd8/0x1fa8 [<000000000027e538>] zfcp_fsf_req_complete+0x104/0xe4c [<0000000000274534>] zfcp_qdio_reqid_check+0xf4/0x178 [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430 [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c [<000000000020229a>] do_adapter_IO+0xb2/0xc0 [<0000000000206f32>] do_IRQ+0x136/0x16c [<0000000000020462>] io_no_vtime+0x16/0x1c [<0000000000019432>] cpu_idle+0x222/0x250 ([<0000000000019416>] cpu_idle+0x206/0x250) [<000000000001405a>] rest_init+0x5a/0x68 [<0000000000536998>] start_kernel+0x39c/0x3dc [<0000000000013046>] _stext+0x46/0x1000 Fix incorrect usage of erp_lock. Using the write_lock() variant is wrong, since this might lead to deadlocks. Acked-by: Andreas Herrmann <aherrman@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-04 00:32:20 +00:00
write_lock_irq(&adapter->erp_lock);
zfcp_erp_action_to_running(erp_action);
[PATCH] zfcp: fix incorrect usage of erp_lock ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {hardirq-on-W} -> {in-hardirq-W} usage. swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&adapter->erp_lock){+-..}, at: [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 {hardirq-on-W} state was registered at: [<000000000005f33e>] __lock_acquire+0x30a/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a7ae>] _write_lock+0x4e/0x68 [<000000000026d822>] zfcp_erp_adapter_strategy_generic+0x286/0xd94 [<000000000026fd72>] zfcp_erp_strategy_do_action+0x91e/0x1a94 [<0000000000271a3a>] zfcp_erp_thread+0x21a/0x1568 [<0000000000019096>] kernel_thread_starter+0x6/0xc [<0000000000019090>] kernel_thread_starter+0x0/0xc irq event stamp: 12078 hardirqs last enabled at (12077): [<0000000000019416>] cpu_idle+0x206/0x250 hardirqs last disabled at (12078): [<0000000000020458>] io_no_vtime+0xc/0x1c softirqs last enabled at (12072): [<0000000000040b62>] __do_softirq+0x13a/0x180 softirqs last disabled at (12059): [<000000000001fd58>] do_softirq+0xec/0xf0 other info that might help us debug this: no locks held by swapper/0. stack backtrace: 00000000012bb648 0000000000000002 0000000000000000 00000000012bb758 00000000012bb6c0 0000000000399122 0000000000399122 0000000000016b0a 0000000000000000 0000000000000001 0000000000000000 00000000004660e8 0000000000000000 000000000000000d 00000000012bb6b8 00000000012bb730 0000000000368b90 0000000000016b0a 00000000012bb6b8 00000000012bb708 Call Trace: ([<0000000000016a26>] show_trace+0x76/0xdc) [<0000000000016b2c>] show_stack+0xa0/0xd0 [<0000000000016b8a>] dump_stack+0x2e/0x3c [<000000000005e3da>] print_usage_bug+0x27e/0x290 [<000000000005e934>] mark_lock+0x548/0x6c0 [<000000000005fb0c>] __lock_acquire+0xad8/0xed0 [<00000000000604ae>] lock_acquire+0x9a/0xc8 [<000000000035a662>] _write_lock_irqsave+0x62/0x80 [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70 [<0000000000279178>] zfcp_fsf_req_dispatch+0xd8/0x1fa8 [<000000000027e538>] zfcp_fsf_req_complete+0x104/0xe4c [<0000000000274534>] zfcp_qdio_reqid_check+0xf4/0x178 [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430 [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c [<000000000020229a>] do_adapter_IO+0xb2/0xc0 [<0000000000206f32>] do_IRQ+0x136/0x16c [<0000000000020462>] io_no_vtime+0x16/0x1c [<0000000000019432>] cpu_idle+0x222/0x250 ([<0000000000019416>] cpu_idle+0x206/0x250) [<000000000001405a>] rest_init+0x5a/0x68 [<0000000000536998>] start_kernel+0x39c/0x3dc [<0000000000013046>] _stext+0x46/0x1000 Fix incorrect usage of erp_lock. Using the write_lock() variant is wrong, since this might lead to deadlocks. Acked-by: Andreas Herrmann <aherrman@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-04 00:32:20 +00:00
write_unlock_irq(&adapter->erp_lock);
ret = zfcp_fsf_exchange_port_data(erp_action);
if (ret == -EOPNOTSUPP) {
return ZFCP_ERP_SUCCEEDED;
} else if (ret) {
return ZFCP_ERP_FAILED;
}
ret = ZFCP_ERP_SUCCEEDED;
zfcp_rec_dbf_event_thread_lock(8, adapter);
down(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread_lock(9, adapter);
if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) {
ZFCP_LOG_INFO("error: exchange port data timed out (adapter "
"%s)\n", zfcp_get_busid_by_adapter(adapter));
ret = ZFCP_ERP_FAILED;
}
/* don't treat as error for the sake of compatibility */
if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status))
ZFCP_LOG_INFO("warning: exchange port data failed (adapter "
"%s\n", zfcp_get_busid_by_adapter(adapter));
return ret;
}
static int
zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action
*erp_action)
{
struct zfcp_adapter *adapter = erp_action->adapter;
atomic_set(&adapter->stat_miss, 16);
return zfcp_status_read_refill(adapter);
}
/*
* function:
*
* purpose: this routine executes the 'Reopen Physical Port' action
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_FAILED;
struct zfcp_port *port = erp_action->port;
switch (erp_action->step) {
/*
* FIXME:
* the ULP spec. begs for waiting for oustanding commands
*/
case ZFCP_ERP_STEP_UNINITIALIZED:
zfcp_erp_port_strategy_clearstati(port);
/*
* it would be sufficient to test only the normal open flag
* since the phys. open flag cannot be set if the normal
* open flag is unset - however, this is for readabilty ...
*/
if (atomic_test_mask((ZFCP_STATUS_PORT_PHYS_OPEN |
ZFCP_STATUS_COMMON_OPEN),
&port->status)) {
ZFCP_LOG_DEBUG("port 0x%016Lx is open -> trying "
"close physical\n", port->wwpn);
retval =
zfcp_erp_port_forced_strategy_close(erp_action);
} else
retval = ZFCP_ERP_FAILED;
break;
case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
if (atomic_test_mask(ZFCP_STATUS_PORT_PHYS_OPEN,
&port->status)) {
ZFCP_LOG_DEBUG("close physical failed for port "
"0x%016Lx\n", port->wwpn);
retval = ZFCP_ERP_FAILED;
} else
retval = ZFCP_ERP_SUCCEEDED;
break;
}
return retval;
}
/*
* function:
*
* purpose: this routine executes the 'Reopen Port' action
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_FAILED;
struct zfcp_port *port = erp_action->port;
switch (erp_action->step) {
/*
* FIXME:
* the ULP spec. begs for waiting for oustanding commands
*/
case ZFCP_ERP_STEP_UNINITIALIZED:
zfcp_erp_port_strategy_clearstati(port);
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &port->status)) {
ZFCP_LOG_DEBUG("port 0x%016Lx is open -> trying "
"close\n", port->wwpn);
retval = zfcp_erp_port_strategy_close(erp_action);
goto out;
} /* else it's already closed, open it */
break;
case ZFCP_ERP_STEP_PORT_CLOSING:
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &port->status)) {
ZFCP_LOG_DEBUG("close failed for port 0x%016Lx\n",
port->wwpn);
retval = ZFCP_ERP_FAILED;
goto out;
} /* else it's closed now, open it */
break;
}
if (erp_action->status & ZFCP_STATUS_ERP_CLOSE_ONLY)
retval = ZFCP_ERP_EXIT;
else
retval = zfcp_erp_port_strategy_open(erp_action);
out:
return retval;
}
static int
zfcp_erp_port_strategy_open(struct zfcp_erp_action *erp_action)
{
int retval;
if (atomic_test_mask(ZFCP_STATUS_PORT_WKA,
&erp_action->port->status))
retval = zfcp_erp_port_strategy_open_nameserver(erp_action);
else
retval = zfcp_erp_port_strategy_open_common(erp_action);
return retval;
}
static int
zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *erp_action)
{
int retval = 0;
struct zfcp_adapter *adapter = erp_action->adapter;
struct zfcp_port *port = erp_action->port;
switch (erp_action->step) {
case ZFCP_ERP_STEP_UNINITIALIZED:
case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
case ZFCP_ERP_STEP_PORT_CLOSING:
if (fc_host_port_type(adapter->scsi_host) == FC_PORTTYPE_PTP) {
if (port->wwpn != adapter->peer_wwpn) {
ZFCP_LOG_NORMAL("Failed to open port 0x%016Lx "
"on adapter %s.\nPeer WWPN "
"0x%016Lx does not match\n",
port->wwpn,
zfcp_get_busid_by_adapter(adapter),
adapter->peer_wwpn);
zfcp_erp_port_failed(port, 25, NULL);
retval = ZFCP_ERP_FAILED;
break;
}
port->d_id = adapter->peer_d_id;
atomic_set_mask(ZFCP_STATUS_PORT_DID_DID, &port->status);
retval = zfcp_erp_port_strategy_open_port(erp_action);
break;
}
if (!(adapter->nameserver_port)) {
retval = zfcp_nameserver_enqueue(adapter);
if (retval != 0) {
ZFCP_LOG_NORMAL("error: nameserver port "
"unavailable for adapter %s\n",
zfcp_get_busid_by_adapter(adapter));
retval = ZFCP_ERP_FAILED;
break;
}
}
if (!atomic_test_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
&adapter->nameserver_port->status)) {
ZFCP_LOG_DEBUG("nameserver port is not open -> open "
"nameserver port\n");
/* nameserver port may live again */
atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING,
&adapter->nameserver_port->status);
if (zfcp_erp_port_reopen(adapter->nameserver_port, 0,
77, erp_action) >= 0) {
erp_action->step =
ZFCP_ERP_STEP_NAMESERVER_OPEN;
retval = ZFCP_ERP_CONTINUES;
} else
retval = ZFCP_ERP_FAILED;
break;
}
/* else nameserver port is already open, fall through */
case ZFCP_ERP_STEP_NAMESERVER_OPEN:
if (!atomic_test_mask(ZFCP_STATUS_COMMON_OPEN,
&adapter->nameserver_port->status)) {
ZFCP_LOG_DEBUG("open failed for nameserver port\n");
retval = ZFCP_ERP_FAILED;
} else {
ZFCP_LOG_DEBUG("nameserver port is open -> "
"nameserver look-up for port 0x%016Lx\n",
port->wwpn);
retval = zfcp_erp_port_strategy_open_common_lookup
(erp_action);
}
break;
case ZFCP_ERP_STEP_NAMESERVER_LOOKUP:
if (!atomic_test_mask(ZFCP_STATUS_PORT_DID_DID, &port->status)) {
if (atomic_test_mask
(ZFCP_STATUS_PORT_INVALID_WWPN, &port->status)) {
ZFCP_LOG_DEBUG("nameserver look-up failed "
"for port 0x%016Lx "
"(misconfigured WWPN?)\n",
port->wwpn);
zfcp_erp_port_failed(port, 26, NULL);
retval = ZFCP_ERP_EXIT;
} else {
ZFCP_LOG_DEBUG("nameserver look-up failed for "
"port 0x%016Lx\n", port->wwpn);
retval = ZFCP_ERP_FAILED;
}
} else {
ZFCP_LOG_DEBUG("port 0x%016Lx has d_id=0x%06x -> "
"trying open\n", port->wwpn, port->d_id);
retval = zfcp_erp_port_strategy_open_port(erp_action);
}
break;
case ZFCP_ERP_STEP_PORT_OPENING:
/* D_ID might have changed during open */
if (atomic_test_mask((ZFCP_STATUS_COMMON_OPEN |
ZFCP_STATUS_PORT_DID_DID),
&port->status)) {
ZFCP_LOG_DEBUG("port 0x%016Lx is open\n", port->wwpn);
retval = ZFCP_ERP_SUCCEEDED;
} else {
ZFCP_LOG_DEBUG("open failed for port 0x%016Lx\n",
port->wwpn);
retval = ZFCP_ERP_FAILED;
}
break;
default:
ZFCP_LOG_NORMAL("bug: unknown erp step 0x%08x\n",
erp_action->step);
retval = ZFCP_ERP_FAILED;
}
return retval;
}
static int
zfcp_erp_port_strategy_open_nameserver(struct zfcp_erp_action *erp_action)
{
int retval;
struct zfcp_port *port = erp_action->port;
switch (erp_action->step) {
case ZFCP_ERP_STEP_UNINITIALIZED:
case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
case ZFCP_ERP_STEP_PORT_CLOSING:
ZFCP_LOG_DEBUG("port 0x%016Lx has d_id=0x%06x -> trying open\n",
port->wwpn, port->d_id);
retval = zfcp_erp_port_strategy_open_port(erp_action);
break;
case ZFCP_ERP_STEP_PORT_OPENING:
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &port->status)) {
ZFCP_LOG_DEBUG("WKA port is open\n");
retval = ZFCP_ERP_SUCCEEDED;
} else {
ZFCP_LOG_DEBUG("open failed for WKA port\n");
retval = ZFCP_ERP_FAILED;
}
/* this is needed anyway (dont care for retval of wakeup) */
ZFCP_LOG_DEBUG("continue other open port operations\n");
zfcp_erp_port_strategy_open_nameserver_wakeup(erp_action);
break;
default:
ZFCP_LOG_NORMAL("bug: unknown erp step 0x%08x\n",
erp_action->step);
retval = ZFCP_ERP_FAILED;
}
return retval;
}
/*
* function:
*
* purpose: makes the erp thread continue with reopen (physical) port
* actions which have been paused until the name server port
* is opened (or failed)
*
* returns: 0 (a kind of void retval, its not used)
*/
static int
zfcp_erp_port_strategy_open_nameserver_wakeup(struct zfcp_erp_action
*ns_erp_action)
{
int retval = 0;
unsigned long flags;
struct zfcp_adapter *adapter = ns_erp_action->adapter;
struct zfcp_erp_action *erp_action, *tmp;
read_lock_irqsave(&adapter->erp_lock, flags);
list_for_each_entry_safe(erp_action, tmp, &adapter->erp_running_head,
list) {
if (erp_action->step == ZFCP_ERP_STEP_NAMESERVER_OPEN) {
if (atomic_test_mask(
ZFCP_STATUS_COMMON_ERP_FAILED,
&adapter->nameserver_port->status))
zfcp_erp_port_failed(erp_action->port, 27,
NULL);
zfcp_erp_action_ready(erp_action);
}
}
read_unlock_irqrestore(&adapter->erp_lock, flags);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fsf_close_physical_port(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_PHYS_PORT_CLOSING;
if (retval != 0) {
/* could not send 'open', fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
static int
zfcp_erp_port_strategy_clearstati(struct zfcp_port *port)
{
int retval = 0;
atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING |
ZFCP_STATUS_COMMON_CLOSING |
ZFCP_STATUS_COMMON_ACCESS_DENIED |
ZFCP_STATUS_PORT_DID_DID |
ZFCP_STATUS_PORT_PHYS_CLOSING |
ZFCP_STATUS_PORT_INVALID_WWPN,
&port->status);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fsf_close_port(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_PORT_CLOSING;
if (retval != 0) {
/* could not send 'close', fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fsf_open_port(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_PORT_OPENING;
if (retval != 0) {
/* could not send 'open', fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_port_strategy_open_common_lookup(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fc_ns_gid_pn_request(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP;
if (retval != 0) {
/* could not send nameserver request, fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
/*
* function:
*
* purpose: this routine executes the 'Reopen Unit' action
* currently no retries
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_SUCCEEDED - action finished successfully
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_unit_strategy(struct zfcp_erp_action *erp_action)
{
int retval = ZFCP_ERP_FAILED;
struct zfcp_unit *unit = erp_action->unit;
switch (erp_action->step) {
/*
* FIXME:
* the ULP spec. begs for waiting for oustanding commands
*/
case ZFCP_ERP_STEP_UNINITIALIZED:
zfcp_erp_unit_strategy_clearstati(unit);
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status)) {
ZFCP_LOG_DEBUG("unit 0x%016Lx is open -> "
"trying close\n", unit->fcp_lun);
retval = zfcp_erp_unit_strategy_close(erp_action);
break;
}
/* else it's already closed, fall through */
case ZFCP_ERP_STEP_UNIT_CLOSING:
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status)) {
ZFCP_LOG_DEBUG("close failed for unit 0x%016Lx\n",
unit->fcp_lun);
retval = ZFCP_ERP_FAILED;
} else {
if (erp_action->status & ZFCP_STATUS_ERP_CLOSE_ONLY)
retval = ZFCP_ERP_EXIT;
else {
ZFCP_LOG_DEBUG("unit 0x%016Lx is not open -> "
"trying open\n", unit->fcp_lun);
retval =
zfcp_erp_unit_strategy_open(erp_action);
}
}
break;
case ZFCP_ERP_STEP_UNIT_OPENING:
if (atomic_test_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status)) {
ZFCP_LOG_DEBUG("unit 0x%016Lx is open\n",
unit->fcp_lun);
retval = ZFCP_ERP_SUCCEEDED;
} else {
ZFCP_LOG_DEBUG("open failed for unit 0x%016Lx\n",
unit->fcp_lun);
retval = ZFCP_ERP_FAILED;
}
break;
}
return retval;
}
static int
zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *unit)
{
int retval = 0;
atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING |
ZFCP_STATUS_COMMON_CLOSING |
ZFCP_STATUS_COMMON_ACCESS_DENIED |
ZFCP_STATUS_UNIT_SHARED |
ZFCP_STATUS_UNIT_READONLY,
&unit->status);
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_unit_strategy_close(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fsf_close_unit(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_UNIT_CLOSING;
if (retval != 0) {
/* could not send 'close', fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
/*
* function:
*
* purpose:
*
* returns: ZFCP_ERP_CONTINUES - action continues (asynchronously)
* ZFCP_ERP_FAILED - action finished unsuccessfully
*/
static int
zfcp_erp_unit_strategy_open(struct zfcp_erp_action *erp_action)
{
int retval;
retval = zfcp_fsf_open_unit(erp_action);
if (retval == -ENOMEM) {
retval = ZFCP_ERP_NOMEM;
goto out;
}
erp_action->step = ZFCP_ERP_STEP_UNIT_OPENING;
if (retval != 0) {
/* could not send 'open', fail */
retval = ZFCP_ERP_FAILED;
goto out;
}
retval = ZFCP_ERP_CONTINUES;
out:
return retval;
}
void zfcp_erp_start_timer(struct zfcp_fsf_req *fsf_req)
{
BUG_ON(!fsf_req->erp_action);
fsf_req->timer.function = zfcp_erp_timeout_handler;
fsf_req->timer.data = (unsigned long) fsf_req->erp_action;
fsf_req->timer.expires = jiffies + ZFCP_ERP_FSFREQ_TIMEOUT;
add_timer(&fsf_req->timer);
}
/*
* function:
*
* purpose: enqueue the specified error recovery action, if needed
*
* returns:
*/
static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct zfcp_unit *unit, u8 id, void *ref)
{
int retval = 1, need = want;
struct zfcp_erp_action *erp_action = NULL;
u32 status = 0;
/*
* We need some rules here which check whether we really need
* this action or whether we should just drop it.
* E.g. if there is a unfinished 'Reopen Port' request then we drop a
* 'Reopen Unit' request for an associated unit since we can't
* satisfy this request now. A 'Reopen Port' action will trigger
* 'Reopen Unit' actions when it completes.
* Thus, there are only actions in the queue which can immediately be
* executed. This makes the processing of the action queue more
* efficient.
*/
if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP,
&adapter->status))
return -EIO;
/* check whether we really need this */
switch (want) {
case ZFCP_ERP_ACTION_REOPEN_UNIT:
if (atomic_test_mask
(ZFCP_STATUS_COMMON_ERP_INUSE, &unit->status)) {
goto out;
}
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_RUNNING, &port->status) ||
atomic_test_mask
(ZFCP_STATUS_COMMON_ERP_FAILED, &port->status)) {
goto out;
}
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status))
need = ZFCP_ERP_ACTION_REOPEN_PORT;
/* fall through !!! */
case ZFCP_ERP_ACTION_REOPEN_PORT:
if (atomic_test_mask
(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status)) {
goto out;
}
/* fall through !!! */
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
&port->status)) {
if (port->erp_action.action !=
ZFCP_ERP_ACTION_REOPEN_PORT_FORCED) {
ZFCP_LOG_INFO("dropped erp action %i (port "
"0x%016Lx, action in use: %i)\n",
want, port->wwpn,
port->erp_action.action);
}
goto out;
}
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_RUNNING, &adapter->status) ||
atomic_test_mask
(ZFCP_STATUS_COMMON_ERP_FAILED, &adapter->status)) {
goto out;
}
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status))
need = ZFCP_ERP_ACTION_REOPEN_ADAPTER;
/* fall through !!! */
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
if (atomic_test_mask
(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status)) {
goto out;
}
break;
default:
ZFCP_LOG_NORMAL("bug: unknown erp action requested "
"on adapter %s (action=%d)\n",
zfcp_get_busid_by_adapter(adapter), want);
goto out;
}
/* check whether we need something stronger first */
if (need) {
ZFCP_LOG_DEBUG("stronger erp action %d needed before "
"erp action %d on adapter %s\n",
need, want, zfcp_get_busid_by_adapter(adapter));
}
/* mark adapter to have some error recovery pending */
atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
/* setup error recovery action */
switch (need) {
case ZFCP_ERP_ACTION_REOPEN_UNIT:
zfcp_unit_get(unit);
atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &unit->status);
erp_action = &unit->erp_action;
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_RUNNING, &unit->status))
status = ZFCP_STATUS_ERP_CLOSE_ONLY;
break;
case ZFCP_ERP_ACTION_REOPEN_PORT:
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
zfcp_port_get(port);
zfcp_erp_action_dismiss_port(port);
atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
erp_action = &port->erp_action;
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_RUNNING, &port->status))
status = ZFCP_STATUS_ERP_CLOSE_ONLY;
break;
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
zfcp_adapter_get(adapter);
zfcp_erp_action_dismiss_adapter(adapter);
atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
erp_action = &adapter->erp_action;
if (!atomic_test_mask
(ZFCP_STATUS_COMMON_RUNNING, &adapter->status))
status = ZFCP_STATUS_ERP_CLOSE_ONLY;
break;
}
memset(erp_action, 0, sizeof (struct zfcp_erp_action));
erp_action->adapter = adapter;
erp_action->port = port;
erp_action->unit = unit;
erp_action->action = need;
erp_action->status = status;
++adapter->erp_total_count;
/* finally put it into 'ready' queue and kick erp thread */
[SCSI] zfcp: fix cleanup of dismissed error recovery actions Calling zfcp_erp_strategy_check_action() after zfcp_erp_action_to_running() in zfcp_erp_strategy() might cause an unbalanced up() for erp_ready_sem, which makes the zfcp recovery fail somewhere along the way: erp thread processing erp_action: | | someone waking up erp thread for erp_action | | | | someone else dismissing erp_action: | | | V V V write_lock_irqsave(&adapter->erp_lock, flags); ... if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* first up() for erp_action */ } write_unlock_irqrestore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); ... zfcp_erp_action_to_running(erp_action); write_unlock_restore(&adapter->erp_lock, flags); /* processing erp_action */ write_lock_irqsave(&adapter->erp_lock, flags); ... erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* second, unbalanced up() for erp_action */ } ... write_unlock_restore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); retval = ZFCP_ERP_DISMISSED; } ... write_unlock_restore(&adapter->erp_lock, flags); down(&adapter->erp_ready_sem); /* this down() is meant to balance the first up() */ The erp thread must not dismiss an erp_action after moving that action to erp_running_head. Instead it should just go through the down() operation, which balances the first up(), and run through zfcp_erp_strategy one more time for the second up(), which eventually cleans up erp_action. Which is similar to the normal processing of an event for erp_action doing something asynchronously (e.g. waiting for the completion of an fsf_req). This only works if we make sure that a dismissed erp_action is passed to zfcp_erp_strategy() prior to the other action, which caused actions to be dismissed. Therefore the patch implements this rule: running actions go to the head of the ready list; new actions go to the tail of the ready list; the erp thread picks actions to be processed from the ready list's head. Signed-off-by: Martin Peschke <mp3@de.ibm.com> Acked-by: Swen Schillig <swen@vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
2007-11-15 12:57:17 +00:00
list_add_tail(&erp_action->list, &adapter->erp_ready_head);
up(&adapter->erp_ready_sem);
zfcp_rec_dbf_event_thread(1, adapter);
retval = 0;
out:
zfcp_rec_dbf_event_trigger(id, ref, want, need, erp_action,
adapter, port, unit);
return retval;
}
static int
zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
{
int retval = 0;
struct zfcp_adapter *adapter = erp_action->adapter;
--adapter->erp_total_count;
if (erp_action->status & ZFCP_STATUS_ERP_LOWMEM) {
--adapter->erp_low_mem_count;
erp_action->status &= ~ZFCP_STATUS_ERP_LOWMEM;
}
list_del(&erp_action->list);
zfcp_rec_dbf_event_action(144, erp_action);
switch (erp_action->action) {
case ZFCP_ERP_ACTION_REOPEN_UNIT:
atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
&erp_action->unit->status);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
case ZFCP_ERP_ACTION_REOPEN_PORT:
atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
&erp_action->port->status);
break;
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
&erp_action->adapter->status);
break;
default:
/* bug */
break;
}
return retval;
}
/**
* zfcp_erp_action_cleanup
*
* Register unit with scsi stack if appropriate and fix reference counts.
* Note: Temporary units are not registered with scsi stack.
*/
static void
zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter,
struct zfcp_port *port, struct zfcp_unit *unit,
int result)
{
switch (action) {
case ZFCP_ERP_ACTION_REOPEN_UNIT:
if ((result == ZFCP_ERP_SUCCEEDED)
&& (!atomic_test_mask(ZFCP_STATUS_UNIT_TEMPORARY,
&unit->status))
&& !unit->device
&& port->rport) {
atomic_set_mask(ZFCP_STATUS_UNIT_REGISTERED,
&unit->status);
if (atomic_test_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING,
&unit->status) == 0)
zfcp_erp_schedule_work(unit);
}
zfcp_unit_put(unit);
break;
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
case ZFCP_ERP_ACTION_REOPEN_PORT:
if (atomic_test_mask(ZFCP_STATUS_PORT_NO_WWPN,
&port->status)) {
zfcp_port_put(port);
break;
}
if ((result == ZFCP_ERP_SUCCEEDED)
&& !port->rport) {
struct fc_rport_identifiers ids;
ids.node_name = port->wwnn;
ids.port_name = port->wwpn;
ids.port_id = port->d_id;
ids.roles = FC_RPORT_ROLE_FCP_TARGET;
port->rport =
fc_remote_port_add(adapter->scsi_host, 0, &ids);
if (!port->rport)
ZFCP_LOG_NORMAL("failed registration of rport"
"(adapter %s, wwpn=0x%016Lx)\n",
zfcp_get_busid_by_port(port),
port->wwpn);
else {
scsi_target_unblock(&port->rport->dev);
port->rport->maxframe_size = port->maxframe_size;
port->rport->supported_classes =
port->supported_classes;
}
}
if ((result != ZFCP_ERP_SUCCEEDED) && port->rport) {
fc_remote_port_delete(port->rport);
port->rport = NULL;
}
zfcp_port_put(port);
break;
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
if (result != ZFCP_ERP_SUCCEEDED) {
list_for_each_entry(port, &adapter->port_list_head, list)
if (port->rport &&
!atomic_test_mask(ZFCP_STATUS_PORT_WKA,
&port->status)) {
fc_remote_port_delete(port->rport);
port->rport = NULL;
}
}
zfcp_adapter_put(adapter);
break;
default:
break;
}
}
static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
{
struct zfcp_port *port;
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status))
zfcp_erp_action_dismiss(&adapter->erp_action);
else
list_for_each_entry(port, &adapter->port_list_head, list)
zfcp_erp_action_dismiss_port(port);
}
static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
{
struct zfcp_unit *unit;
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status))
zfcp_erp_action_dismiss(&port->erp_action);
else
list_for_each_entry(unit, &port->unit_list_head, list)
zfcp_erp_action_dismiss_unit(unit);
}
static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit)
{
if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &unit->status))
zfcp_erp_action_dismiss(&unit->erp_action);
}
static void zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action)
{
list_move(&erp_action->list, &erp_action->adapter->erp_running_head);
zfcp_rec_dbf_event_action(145, erp_action);
}
static void zfcp_erp_action_to_ready(struct zfcp_erp_action *erp_action)
{
list_move(&erp_action->list, &erp_action->adapter->erp_ready_head);
zfcp_rec_dbf_event_action(146, erp_action);
}
void zfcp_erp_port_boxed(struct zfcp_port *port, u8 id, void *ref)
{
unsigned long flags;
read_lock_irqsave(&zfcp_data.config_lock, flags);
zfcp_erp_modify_port_status(port, id, ref,
ZFCP_STATUS_COMMON_ACCESS_BOXED, ZFCP_SET);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED, id, ref);
}
void zfcp_erp_unit_boxed(struct zfcp_unit *unit, u8 id, void *ref)
{
zfcp_erp_modify_unit_status(unit, id, ref,
ZFCP_STATUS_COMMON_ACCESS_BOXED, ZFCP_SET);
zfcp_erp_unit_reopen(unit, ZFCP_STATUS_COMMON_ERP_FAILED, id, ref);
}
void zfcp_erp_port_access_denied(struct zfcp_port *port, u8 id, void *ref)
{
unsigned long flags;
read_lock_irqsave(&zfcp_data.config_lock, flags);
zfcp_erp_modify_port_status(port, id, ref,
ZFCP_STATUS_COMMON_ERP_FAILED |
ZFCP_STATUS_COMMON_ACCESS_DENIED, ZFCP_SET);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
}
void zfcp_erp_unit_access_denied(struct zfcp_unit *unit, u8 id, void *ref)
{
zfcp_erp_modify_unit_status(unit, id, ref,
ZFCP_STATUS_COMMON_ERP_FAILED |
ZFCP_STATUS_COMMON_ACCESS_DENIED, ZFCP_SET);
}
void zfcp_erp_adapter_access_changed(struct zfcp_adapter *adapter, u8 id,
void *ref)
{
struct zfcp_port *port;
unsigned long flags;
if (adapter->connection_features & FSF_FEATURE_NPIV_MODE)
return;
read_lock_irqsave(&zfcp_data.config_lock, flags);
if (adapter->nameserver_port)
zfcp_erp_port_access_changed(adapter->nameserver_port, id, ref);
list_for_each_entry(port, &adapter->port_list_head, list)
if (port != adapter->nameserver_port)
zfcp_erp_port_access_changed(port, id, ref);
read_unlock_irqrestore(&zfcp_data.config_lock, flags);
}
void zfcp_erp_port_access_changed(struct zfcp_port *port, u8 id, void *ref)
{
struct zfcp_adapter *adapter = port->adapter;
struct zfcp_unit *unit;
if (!atomic_test_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED,
&port->status) &&
!atomic_test_mask(ZFCP_STATUS_COMMON_ACCESS_BOXED,
&port->status)) {
if (!atomic_test_mask(ZFCP_STATUS_PORT_WKA, &port->status))
list_for_each_entry(unit, &port->unit_list_head, list)
zfcp_erp_unit_access_changed(unit, id, ref);
return;
}
ZFCP_LOG_NORMAL("reopen of port 0x%016Lx on adapter %s "
"(due to ACT update)\n",
port->wwpn, zfcp_get_busid_by_adapter(adapter));
if (zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED, id, ref))
ZFCP_LOG_NORMAL("failed reopen of port"
"(adapter %s, wwpn=0x%016Lx)\n",
zfcp_get_busid_by_adapter(adapter), port->wwpn);
}
void zfcp_erp_unit_access_changed(struct zfcp_unit *unit, u8 id, void *ref)
{
struct zfcp_adapter *adapter = unit->port->adapter;
if (!atomic_test_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED,
&unit->status) &&
!atomic_test_mask(ZFCP_STATUS_COMMON_ACCESS_BOXED,
&unit->status))
return;
ZFCP_LOG_NORMAL("reopen of unit 0x%016Lx on port 0x%016Lx "
" on adapter %s (due to ACT update)\n",
unit->fcp_lun, unit->port->wwpn,
zfcp_get_busid_by_adapter(adapter));
if (zfcp_erp_unit_reopen(unit, ZFCP_STATUS_COMMON_ERP_FAILED, id, ref))
ZFCP_LOG_NORMAL("failed reopen of unit (adapter %s, "
"wwpn=0x%016Lx, fcp_lun=0x%016Lx)\n",
zfcp_get_busid_by_adapter(adapter),
unit->port->wwpn, unit->fcp_lun);
}
#undef ZFCP_LOG_AREA