s390/cio: Implement proper Link Incident Record handling

A hardware problem on a FICON link is reported by the Channel Subsystem
to the operating system via a Link Incident Record (LIR). In response,
the operating system should issue a message that enables hardware
service personnel to identify and repair the failing component.

Current Linux LIR handling is broken because LIR data is incorrectly
interpreted and no log message is generated.

This patch fixes Linux LIR handling by implementing a new log message
for LIRs indicating a degraded or non-operational link. Also LIRs are
no longer used to deactivate channel paths because the available data
does not reliably allow to determine the affected channel path.

Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Peter Oberparleiter 2015-07-15 14:04:18 +02:00 committed by Martin Schwidefsky
parent b1865401cf
commit 1d2334cb7d

View File

@ -21,6 +21,7 @@
#include <asm/chsc.h>
#include <asm/crw.h>
#include <asm/isc.h>
#include <asm/ebcdic.h>
#include "css.h"
#include "cio.h"
@ -272,36 +273,6 @@ static void s390_process_res_acc(struct chp_link *link)
css_schedule_reprobe();
}
static int
__get_chpid_from_lir(void *data)
{
struct lir {
u8 iq;
u8 ic;
u16 sci;
/* incident-node descriptor */
u32 indesc[28];
/* attached-node descriptor */
u32 andesc[28];
/* incident-specific information */
u32 isinfo[28];
} __attribute__ ((packed)) *lir;
lir = data;
if (!(lir->iq&0x80))
/* NULL link incident record */
return -EINVAL;
if (!(lir->indesc[0]&0xc0000000))
/* node descriptor not valid */
return -EINVAL;
if (!(lir->indesc[0]&0x10000000))
/* don't handle device-type nodes - FIXME */
return -EINVAL;
/* Byte 3 contains the chpid. Could also be CTCA, but we don't care */
return (u16) (lir->indesc[0]&0x000000ff);
}
struct chsc_sei_nt0_area {
u8 flags;
u8 vf; /* validity flags */
@ -341,22 +312,132 @@ struct chsc_sei {
} u;
} __packed;
/*
* Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
*/
#define ND_VALIDITY_VALID 0
#define ND_VALIDITY_OUTDATED 1
#define ND_VALIDITY_INVALID 2
struct node_descriptor {
/* Flags. */
union {
struct {
u32 validity:3;
u32 reserved:5;
} __packed;
u8 byte0;
} __packed;
/* Node parameters. */
u32 params:24;
/* Node ID. */
char type[6];
char model[3];
char manufacturer[3];
char plant[2];
char seq[12];
u16 tag;
} __packed;
/*
* Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
*/
#define LIR_IQ_CLASS_INFO 0
#define LIR_IQ_CLASS_DEGRADED 1
#define LIR_IQ_CLASS_NOT_OPERATIONAL 2
struct lir {
struct {
u32 null:1;
u32 reserved:3;
u32 class:2;
u32 reserved2:2;
} __packed iq;
u32 ic:8;
u32 reserved:16;
struct node_descriptor incident_node;
struct node_descriptor attached_node;
u8 reserved2[32];
} __packed;
#define PARAMS_LEN 10 /* PARAMS=xx,xxxxxx */
#define NODEID_LEN 35 /* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
/* Copy EBCIDC text, convert to ASCII and optionally add delimiter. */
static char *store_ebcdic(char *dest, const char *src, unsigned long len,
char delim)
{
memcpy(dest, src, len);
EBCASC(dest, len);
if (delim)
dest[len++] = delim;
return dest + len;
}
/* Format node ID and parameters for output in LIR log message. */
static void format_node_data(char *params, char *id, struct node_descriptor *nd)
{
memset(params, 0, PARAMS_LEN);
memset(id, 0, NODEID_LEN);
if (nd->validity != ND_VALIDITY_VALID) {
strncpy(params, "n/a", PARAMS_LEN - 1);
strncpy(id, "n/a", NODEID_LEN - 1);
return;
}
/* PARAMS=xx,xxxxxx */
snprintf(params, PARAMS_LEN, "%02x,%06x", nd->byte0, nd->params);
/* NODEID=tttttt/mdl,mmm.ppssssssssssss,xxxx */
id = store_ebcdic(id, nd->type, sizeof(nd->type), '/');
id = store_ebcdic(id, nd->model, sizeof(nd->model), ',');
id = store_ebcdic(id, nd->manufacturer, sizeof(nd->manufacturer), '.');
id = store_ebcdic(id, nd->plant, sizeof(nd->plant), 0);
id = store_ebcdic(id, nd->seq, sizeof(nd->seq), ',');
sprintf(id, "%04X", nd->tag);
}
static void chsc_process_sei_link_incident(struct chsc_sei_nt0_area *sei_area)
{
struct chp_id chpid;
int id;
struct lir *lir = (struct lir *) &sei_area->ccdf;
char iuparams[PARAMS_LEN], iunodeid[NODEID_LEN], auparams[PARAMS_LEN],
aunodeid[NODEID_LEN];
CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
sei_area->rs, sei_area->rsid);
if (sei_area->rs != 4)
CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x, iq=%02x)\n",
sei_area->rs, sei_area->rsid, sei_area->ccdf[0]);
/* Ignore NULL Link Incident Records. */
if (lir->iq.null)
return;
id = __get_chpid_from_lir(sei_area->ccdf);
if (id < 0)
CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
else {
chp_id_init(&chpid);
chpid.id = id;
chsc_chp_offline(chpid);
/* Inform user that a link requires maintenance actions because it has
* become degraded or not operational. Note that this log message is
* the primary intention behind a Link Incident Record. */
format_node_data(iuparams, iunodeid, &lir->incident_node);
format_node_data(auparams, aunodeid, &lir->attached_node);
switch (lir->iq.class) {
case LIR_IQ_CLASS_DEGRADED:
pr_warn("Link degraded: RS=%02x RSID=%04x IC=%02x "
"IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
sei_area->rs, sei_area->rsid, lir->ic, iuparams,
iunodeid, auparams, aunodeid);
break;
case LIR_IQ_CLASS_NOT_OPERATIONAL:
pr_err("Link stopped: RS=%02x RSID=%04x IC=%02x "
"IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n",
sei_area->rs, sei_area->rsid, lir->ic, iuparams,
iunodeid, auparams, aunodeid);
break;
default:
break;
}
}