[PATCH] s390: dasd extended error reporting module
The DASD extended error reporting is a facility that allows to get detailed information about certain problems in the DASD I/O. This information can be used to implement fail-over applications that can recover these problems. Signed-off-by: Stefan Weinhuber <wein@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
d237bf4926
commit
12c3a54848
@ -55,13 +55,21 @@ config DASD_DIAG
|
||||
Disks under VM. If you are not running under VM or unsure what it is,
|
||||
say "N".
|
||||
|
||||
config DASD_EER
|
||||
tristate "Extended error reporting (EER)"
|
||||
depends on DASD
|
||||
help
|
||||
This driver provides a character device interface to the
|
||||
DASD extended error reporting. This is only needed if you want to
|
||||
use applications written for the EER facility.
|
||||
|
||||
config DASD_CMB
|
||||
tristate "Compatibility interface for DASD channel measurement blocks"
|
||||
depends on DASD
|
||||
help
|
||||
This driver provides an additional interface to the channel measurement
|
||||
facility, which is normally accessed though sysfs, with a set of
|
||||
ioctl functions specific to the dasd driver.
|
||||
This driver provides an additional interface to the channel
|
||||
measurement facility, which is normally accessed though sysfs, with
|
||||
a set of ioctl functions specific to the dasd driver.
|
||||
This is only needed if you want to use applications written for
|
||||
linux-2.4 dasd channel measurement facility interface.
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
dasd_eckd_mod-objs := dasd_eckd.o dasd_3990_erp.o dasd_9343_erp.o
|
||||
dasd_fba_mod-objs := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o
|
||||
dasd_diag_mod-objs := dasd_diag.o
|
||||
dasd_eer_mod-objs := dasd_eer.o
|
||||
dasd_mod-objs := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \
|
||||
dasd_genhd.o dasd_erp.o
|
||||
|
||||
@ -13,5 +14,6 @@ obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o
|
||||
obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o
|
||||
obj-$(CONFIG_DASD_FBA) += dasd_fba_mod.o
|
||||
obj-$(CONFIG_DASD_CMB) += dasd_cmb.o
|
||||
obj-$(CONFIG_DASD_EER) += dasd_eer.o
|
||||
obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o
|
||||
obj-$(CONFIG_DCSSBLK) += dcssblk.o
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/notifier.h>
|
||||
|
||||
#include <asm/ccwdev.h>
|
||||
#include <asm/ebcdic.h>
|
||||
@ -57,6 +58,7 @@ static void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *);
|
||||
static void dasd_flush_ccw_queue(struct dasd_device *, int);
|
||||
static void dasd_tasklet(struct dasd_device *);
|
||||
static void do_kick_device(void *data);
|
||||
static void dasd_disable_eer(struct dasd_device *device);
|
||||
|
||||
/*
|
||||
* SECTION: Operations on the device structure.
|
||||
@ -151,6 +153,8 @@ dasd_state_new_to_known(struct dasd_device *device)
|
||||
static inline void
|
||||
dasd_state_known_to_new(struct dasd_device * device)
|
||||
{
|
||||
/* disable extended error reporting for this device */
|
||||
dasd_disable_eer(device);
|
||||
/* Forget the discipline information. */
|
||||
device->discipline = NULL;
|
||||
device->state = DASD_STATE_NEW;
|
||||
@ -867,6 +871,9 @@ dasd_handle_state_change_pending(struct dasd_device *device)
|
||||
struct dasd_ccw_req *cqr;
|
||||
struct list_head *l, *n;
|
||||
|
||||
/* first of all call extended error reporting */
|
||||
dasd_write_eer_trigger(DASD_EER_STATECHANGE, device, NULL);
|
||||
|
||||
device->stopped &= ~DASD_STOPPED_PENDING;
|
||||
|
||||
/* restart all 'running' IO on queue */
|
||||
@ -1086,6 +1093,19 @@ restart:
|
||||
}
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/* first of all call extended error reporting */
|
||||
if (device->eer && cqr->status == DASD_CQR_FAILED) {
|
||||
dasd_write_eer_trigger(DASD_EER_FATALERROR,
|
||||
device, cqr);
|
||||
|
||||
/* restart request */
|
||||
cqr->status = DASD_CQR_QUEUED;
|
||||
cqr->retries = 255;
|
||||
device->stopped |= DASD_STOPPED_QUIESCE;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/* Process finished ERP request. */
|
||||
if (cqr->refers) {
|
||||
__dasd_process_erp(device, cqr);
|
||||
@ -1223,7 +1243,8 @@ __dasd_start_head(struct dasd_device * device)
|
||||
cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
|
||||
/* check FAILFAST */
|
||||
if (device->stopped & ~DASD_STOPPED_PENDING &&
|
||||
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) {
|
||||
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
|
||||
(!device->eer)) {
|
||||
cqr->status = DASD_CQR_FAILED;
|
||||
dasd_schedule_bh(device);
|
||||
}
|
||||
@ -1965,6 +1986,9 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
|
||||
switch (event) {
|
||||
case CIO_GONE:
|
||||
case CIO_NO_PATH:
|
||||
/* first of all call extended error reporting */
|
||||
dasd_write_eer_trigger(DASD_EER_NOPATH, device, NULL);
|
||||
|
||||
if (device->state < DASD_STATE_BASIC)
|
||||
break;
|
||||
/* Device is active. We want to keep it. */
|
||||
@ -2022,6 +2046,51 @@ dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver)
|
||||
put_driver(drv);
|
||||
}
|
||||
|
||||
/*
|
||||
* notifications for extended error reports
|
||||
*/
|
||||
static struct notifier_block *dasd_eer_chain;
|
||||
|
||||
int
|
||||
dasd_register_eer_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return notifier_chain_register(&dasd_eer_chain, nb);
|
||||
}
|
||||
|
||||
int
|
||||
dasd_unregister_eer_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return notifier_chain_unregister(&dasd_eer_chain, nb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify the registered error reporting module of a problem
|
||||
*/
|
||||
void
|
||||
dasd_write_eer_trigger(unsigned int id, struct dasd_device *device,
|
||||
struct dasd_ccw_req *cqr)
|
||||
{
|
||||
if (device->eer) {
|
||||
struct dasd_eer_trigger temp;
|
||||
temp.id = id;
|
||||
temp.device = device;
|
||||
temp.cqr = cqr;
|
||||
notifier_call_chain(&dasd_eer_chain, DASD_EER_TRIGGER,
|
||||
(void *)&temp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the registered error reporting module to disable error reporting for
|
||||
* a given device and to cleanup any private data structures on that device.
|
||||
*/
|
||||
static void
|
||||
dasd_disable_eer(struct dasd_device *device)
|
||||
{
|
||||
notifier_call_chain(&dasd_eer_chain, DASD_EER_DISABLE, (void *)device);
|
||||
}
|
||||
|
||||
|
||||
static int __init
|
||||
dasd_init(void)
|
||||
{
|
||||
@ -2103,6 +2172,11 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online);
|
||||
EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
|
||||
EXPORT_SYMBOL_GPL(dasd_generic_auto_online);
|
||||
|
||||
EXPORT_SYMBOL(dasd_register_eer_notifier);
|
||||
EXPORT_SYMBOL(dasd_unregister_eer_notifier);
|
||||
EXPORT_SYMBOL(dasd_write_eer_trigger);
|
||||
|
||||
|
||||
/*
|
||||
* Overrides for Emacs so that we follow Linus's tabbing style.
|
||||
* Emacs will notice this stuff at the end of the file and automatically
|
||||
|
@ -1108,6 +1108,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
|
||||
case 0x0B:
|
||||
DEV_MESSAGE(KERN_WARNING, device, "%s",
|
||||
"FORMAT F - Volume is suspended duplex");
|
||||
/* call extended error reporting (EER) */
|
||||
dasd_write_eer_trigger(DASD_EER_PPRCSUSPEND, device,
|
||||
erp->refers);
|
||||
break;
|
||||
case 0x0C:
|
||||
DEV_MESSAGE(KERN_WARNING, device, "%s",
|
||||
|
@ -29,6 +29,7 @@
|
||||
#define DASD_ECKD_CCW_PSF 0x27
|
||||
#define DASD_ECKD_CCW_RSSD 0x3e
|
||||
#define DASD_ECKD_CCW_LOCATE_RECORD 0x47
|
||||
#define DASD_ECKD_CCW_SNSS 0x54
|
||||
#define DASD_ECKD_CCW_DEFINE_EXTENT 0x63
|
||||
#define DASD_ECKD_CCW_WRITE_MT 0x85
|
||||
#define DASD_ECKD_CCW_READ_MT 0x86
|
||||
|
1090
drivers/s390/block/dasd_eer.c
Normal file
1090
drivers/s390/block/dasd_eer.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -275,6 +275,34 @@ struct dasd_discipline {
|
||||
|
||||
extern struct dasd_discipline *dasd_diag_discipline_pointer;
|
||||
|
||||
|
||||
/*
|
||||
* Notification numbers for extended error reporting notifications:
|
||||
* The DASD_EER_DISABLE notification is sent before a dasd_device (and it's
|
||||
* eer pointer) is freed. The error reporting module needs to do all necessary
|
||||
* cleanup steps.
|
||||
* The DASD_EER_TRIGGER notification sends the actual error reports (triggers).
|
||||
*/
|
||||
#define DASD_EER_DISABLE 0
|
||||
#define DASD_EER_TRIGGER 1
|
||||
|
||||
/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */
|
||||
#define DASD_EER_FATALERROR 1
|
||||
#define DASD_EER_NOPATH 2
|
||||
#define DASD_EER_STATECHANGE 3
|
||||
#define DASD_EER_PPRCSUSPEND 4
|
||||
|
||||
/*
|
||||
* The dasd_eer_trigger structure contains all data that we need to send
|
||||
* along with an DASD_EER_TRIGGER notification.
|
||||
*/
|
||||
struct dasd_eer_trigger {
|
||||
unsigned int id;
|
||||
struct dasd_device *device;
|
||||
struct dasd_ccw_req *cqr;
|
||||
};
|
||||
|
||||
|
||||
struct dasd_device {
|
||||
/* Block device stuff. */
|
||||
struct gendisk *gdp;
|
||||
@ -288,6 +316,9 @@ struct dasd_device {
|
||||
unsigned long flags; /* per device flags */
|
||||
unsigned short features; /* copy of devmap-features (read-only!) */
|
||||
|
||||
/* extended error reporting stuff (eer) */
|
||||
void *eer;
|
||||
|
||||
/* Device discipline stuff. */
|
||||
struct dasd_discipline *discipline;
|
||||
char *private;
|
||||
@ -488,6 +519,12 @@ int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *);
|
||||
int dasd_generic_set_offline (struct ccw_device *cdev);
|
||||
int dasd_generic_notify(struct ccw_device *, int);
|
||||
void dasd_generic_auto_online (struct ccw_driver *);
|
||||
int dasd_register_eer_notifier(struct notifier_block *);
|
||||
int dasd_unregister_eer_notifier(struct notifier_block *);
|
||||
void dasd_write_eer_trigger(unsigned int , struct dasd_device *,
|
||||
struct dasd_ccw_req *);
|
||||
|
||||
|
||||
|
||||
/* externals in dasd_devmap.c */
|
||||
extern int dasd_max_devindex;
|
||||
|
@ -204,7 +204,8 @@ typedef struct attrib_data_t {
|
||||
*
|
||||
* Here ist how the ioctl-nr should be used:
|
||||
* 0 - 31 DASD driver itself
|
||||
* 32 - 239 still open
|
||||
* 32 - 229 still open
|
||||
* 230 - 239 DASD extended error reporting
|
||||
* 240 - 255 reserved for EMC
|
||||
*******************************************************************************/
|
||||
|
||||
@ -236,12 +237,22 @@ typedef struct attrib_data_t {
|
||||
#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
|
||||
/* Get Attributes (cache operations) */
|
||||
#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t)
|
||||
/* retrieve extended error-reporting value */
|
||||
#define BIODASDEERGET _IOR(DASD_IOCTL_LETTER,6,int)
|
||||
|
||||
|
||||
/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
|
||||
#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t)
|
||||
/* Set Attributes (cache operations) */
|
||||
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
|
||||
/* retrieve extended error-reporting value */
|
||||
#define BIODASDEERSET _IOW(DASD_IOCTL_LETTER,3,int)
|
||||
|
||||
|
||||
/* remove all records from the eer buffer */
|
||||
#define DASD_EER_PURGE _IO(DASD_IOCTL_LETTER,230)
|
||||
/* set the number of pages that are used for the internal eer buffer */
|
||||
#define DASD_EER_SETBUFSIZE _IOW(DASD_IOCTL_LETTER,230,int)
|
||||
|
||||
|
||||
#endif /* DASD_H */
|
||||
|
Loading…
Reference in New Issue
Block a user