From 26916264c17b5af8a3eaaf83b128f85cf1107cff Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 4 Dec 2006 15:39:47 +0100 Subject: [PATCH 01/34] [S390] remove salipl memory detection. The SALIPL entry point has an needless memory detection routine as we later check the memory size again. The SALIPL code also uses diagnose 0x060 if we are running under VM, but this diagnose is not compatible with the 64 bit addressing mode. The solution is to get rid of this code and rely on the memory detection in the startup code. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head.S | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 0cf59bb7a857..8f8c802f1bcf 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -418,24 +418,6 @@ start: .gotr: l %r10,.tbl # EBCDIC to ASCII table tr 0(240,%r8),0(%r10) - stidp __LC_CPUID # Are we running on VM maybe - cli __LC_CPUID,0xff - bnz .test - .long 0x83300060 # diag 3,0,x'0060' - storage size - b .done -.test: - mvc 0x68(8),.pgmnw # set up pgm check handler - l %r2,.fourmeg - lr %r3,%r2 - bctr %r3,%r0 # 4M-1 -.loop: iske %r0,%r3 - ar %r3,%r2 -.pgmx: - sr %r3,%r2 - la %r3,1(%r3) -.done: - l %r1,.memsize - st %r3,ARCH_OFFSET(%r1) slr %r0,%r0 st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) @@ -443,9 +425,6 @@ start: .tbl: .long _ebcasc # translate table .cmd: .long COMMAND_LINE # address of command line buffer .parm: .long PARMAREA -.memsize: .long memory_size -.fourmeg: .long 0x00400000 # 4M -.pgmnw: .long 0x00080000,.pgmx .lowcase: .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f From 01376f4495840f3daf6d73679242b5964fc9603b Mon Sep 17 00:00:00 2001 From: Horst Hummel Date: Mon, 4 Dec 2006 15:39:50 +0100 Subject: [PATCH 02/34] [S390] handle incorrect values when writing to dasd sysfs attributes. When writing to dasd attributes (e.g. readonly), all values besides '1' are handled like '0'. Other sysfs-attributes like 'online' are checking for '1' and for '0' and do not accept other values. Therefore enhanced checking and error handling in dasd_devmap attribute store functions. Signed-off-by: Horst Hummel Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_devmap.c | 43 +++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 91cf971f0652..b5e70c523921 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -684,21 +684,26 @@ dasd_ro_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dasd_devmap *devmap; - int ro_flag; + int val; + char *endp; devmap = dasd_devmap_from_cdev(to_ccwdev(dev)); if (IS_ERR(devmap)) return PTR_ERR(devmap); - ro_flag = buf[0] == '1'; + + val = simple_strtoul(buf, &endp, 0); + if (((endp + 1) < (buf + count)) || (val > 1)) + return -EINVAL; + spin_lock(&dasd_devmap_lock); - if (ro_flag) + if (val) devmap->features |= DASD_FEATURE_READONLY; else devmap->features &= ~DASD_FEATURE_READONLY; if (devmap->device) devmap->device->features = devmap->features; if (devmap->device && devmap->device->gdp) - set_disk_ro(devmap->device->gdp, ro_flag); + set_disk_ro(devmap->device->gdp, val); spin_unlock(&dasd_devmap_lock); return count; } @@ -729,17 +734,22 @@ dasd_use_diag_store(struct device *dev, struct device_attribute *attr, { struct dasd_devmap *devmap; ssize_t rc; - int use_diag; + int val; + char *endp; devmap = dasd_devmap_from_cdev(to_ccwdev(dev)); if (IS_ERR(devmap)) return PTR_ERR(devmap); - use_diag = buf[0] == '1'; + + val = simple_strtoul(buf, &endp, 0); + if (((endp + 1) < (buf + count)) || (val > 1)) + return -EINVAL; + spin_lock(&dasd_devmap_lock); /* Changing diag discipline flag is only allowed in offline state. */ rc = count; if (!devmap->device) { - if (use_diag) + if (val) devmap->features |= DASD_FEATURE_USEDIAG; else devmap->features &= ~DASD_FEATURE_USEDIAG; @@ -854,20 +864,25 @@ dasd_eer_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dasd_devmap *devmap; - int rc; + int val, rc; + char *endp; devmap = dasd_devmap_from_cdev(to_ccwdev(dev)); if (IS_ERR(devmap)) return PTR_ERR(devmap); if (!devmap->device) - return count; - if (buf[0] == '1') { + return -ENODEV; + + val = simple_strtoul(buf, &endp, 0); + if (((endp + 1) < (buf + count)) || (val > 1)) + return -EINVAL; + + rc = count; + if (val) rc = dasd_eer_enable(devmap->device); - if (rc) - return rc; - } else + else dasd_eer_disable(devmap->device); - return count; + return rc; } static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store); From feb5babead7f8058f0108ec59c3e7c2df666bd67 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:39:52 +0100 Subject: [PATCH 03/34] [S390] Remove unused GENERIC_BUST_SPINLOCK from Kconfig. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 245b81bc7157..5b04c4095c8d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -33,9 +33,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_TIME def_bool y -config GENERIC_BUST_SPINLOCK - bool - mainmenu "Linux Kernel Configuration" config S390 From f7675ad791df4bec2b9d21bcc0f846320f0a921b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:39:55 +0100 Subject: [PATCH 04/34] [S390] Add __must_check to uaccess functions. Follow other architectures and add __must_check to uaccess functions. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/asm-s390/uaccess.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h index 72ae4efddb49..73ac4e82217b 100644 --- a/include/asm-s390/uaccess.h +++ b/include/asm-s390/uaccess.h @@ -201,7 +201,7 @@ extern int __get_user_bad(void) __attribute__((noreturn)); * Returns number of bytes that could not be copied. * On success, this will be zero. */ -static inline unsigned long +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { if (__builtin_constant_p(n) && (n <= 256)) @@ -226,7 +226,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) * Returns number of bytes that could not be copied. * On success, this will be zero. */ -static inline unsigned long +static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { might_sleep(); @@ -252,7 +252,7 @@ copy_to_user(void __user *to, const void *from, unsigned long n) * If some data could not be copied, this function will pad the copied * data to the requested size using zero bytes. */ -static inline unsigned long +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { if (__builtin_constant_p(n) && (n <= 256)) @@ -277,7 +277,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) * If some data could not be copied, this function will pad the copied * data to the requested size using zero bytes. */ -static inline unsigned long +static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { might_sleep(); @@ -288,13 +288,13 @@ copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -static inline unsigned long +static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) { return uaccess.copy_in_user(n, to, from); } -static inline unsigned long +static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_sleep(); @@ -306,7 +306,7 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) /* * Copy a null terminated string from userspace. */ -static inline long +static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; @@ -343,13 +343,13 @@ strnlen_user(const char __user * src, unsigned long n) * Zero Userspace */ -static inline unsigned long +static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { return uaccess.clear_user(n, to); } -static inline unsigned long +static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_sleep(); From 03a4d2087644f5477d9a9742e75a329f23b279e6 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 4 Dec 2006 15:39:58 +0100 Subject: [PATCH 05/34] [S390] Add ipl/reipl loadparm attribute. If multiple kernel images are installed on one DASD, the loadparm can be used to select the boot configuration. This patch introduces the following two new sysfs attributes: /sys/firmware/ipl/loadparm: shows loadparm of current system (ro) /sys/firmware/reipl/ccw/loadparm: loadparm used for next reboot (rw) Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 98 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 1f5e782b3d05..36f0d3004f94 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -13,12 +13,20 @@ #include #include #include +#include #include #include #include #include +#include #define IPL_PARM_BLOCK_VERSION 0 +#define LOADPARM_LEN 8 + +extern char s390_readinfo_sccb[]; +#define SCCB_VALID (*((__u16*)&s390_readinfo_sccb[6]) == 0x0010) +#define SCCB_LOADPARM (&s390_readinfo_sccb[24]) +#define SCCB_FLAG (s390_readinfo_sccb[91]) enum ipl_type { IPL_TYPE_NONE = 1, @@ -289,9 +297,25 @@ static struct attribute_group ipl_fcp_attr_group = { /* CCW ipl device attributes */ +static ssize_t ipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + + if (!SCCB_VALID) + return sprintf(page, "#unknown#\n"); + memcpy(loadparm, SCCB_LOADPARM, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + strstrip(loadparm); + return sprintf(page, "%s\n", loadparm); +} + +static struct subsys_attribute sys_ipl_ccw_loadparm_attr = + __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); + static struct attribute *ipl_ccw_attrs[] = { &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, NULL, }; @@ -348,8 +372,57 @@ static struct attribute_group reipl_fcp_attr_group = { DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_ccw->ipl_info.ccw.devno); +static void reipl_get_ascii_loadparm(char *loadparm) +{ + memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param, + LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + loadparm[LOADPARM_LEN] = 0; + strstrip(loadparm); +} + +static ssize_t reipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +{ + char buf[LOADPARM_LEN + 1]; + + reipl_get_ascii_loadparm(buf); + return sprintf(page, "%s\n", buf); +} + +static ssize_t reipl_ccw_loadparm_store(struct subsystem *subsys, + const char *buf, size_t len) +{ + int i, lp_len; + + /* ignore trailing newline */ + lp_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + lp_len--; + /* loadparm can have max 8 characters and must not start with a blank */ + if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' '))) + return -EINVAL; + /* loadparm can only contain "a-z,A-Z,0-9,SP,." */ + for (i = 0; i < lp_len; i++) { + if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') || + (buf[i] == '.')) + continue; + return -EINVAL; + } + /* initialize loadparm with blanks */ + memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN); + /* copy and convert to ebcdic */ + memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len); + ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN); + return len; +} + +static struct subsys_attribute sys_reipl_ccw_loadparm_attr = + __ATTR(loadparm, 0644, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); + static struct attribute *reipl_ccw_attrs[] = { &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, NULL, }; @@ -571,11 +644,14 @@ void do_reipl(void) { struct ccw_dev_id devid; static char buf[100]; + char loadparm[LOADPARM_LEN + 1]; switch (reipl_type) { case IPL_TYPE_CCW: + reipl_get_ascii_loadparm(loadparm); printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n", reipl_block_ccw->ipl_info.ccw.devno); + printk(KERN_EMERG "loadparm = '%s'\n", loadparm); break; case IPL_TYPE_FCP: printk(KERN_EMERG "reboot on fcp device:\n"); @@ -592,7 +668,12 @@ void do_reipl(void) reipl_ccw_dev(&devid); break; case IPL_METHOD_CCW_VM: - sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno); + if (strlen(loadparm) == 0) + sprintf(buf, "IPL %X", + reipl_block_ccw->ipl_info.ccw.devno); + else + sprintf(buf, "IPL %X LOADPARM '%s'", + reipl_block_ccw->ipl_info.ccw.devno, loadparm); cpcmd(buf, NULL, 0, NULL); break; case IPL_METHOD_CCW_DIAG: @@ -746,6 +827,17 @@ static int __init reipl_ccw_init(void) reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; reipl_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw); reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + /* check if read scp info worked and set loadparm */ + if (SCCB_VALID) + memcpy(reipl_block_ccw->ipl_info.ccw.load_param, + SCCB_LOADPARM, LOADPARM_LEN); + else + /* read scp info failed: set empty loadparm (EBCDIC blanks) */ + memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40, + LOADPARM_LEN); + /* FIXME: check for diag308_set_works when enabling diag ccw reipl */ + if (!MACHINE_IS_VM) + sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; if (ipl_get_type() == IPL_TYPE_CCW) reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; reipl_capabilities |= IPL_TYPE_CCW; @@ -827,13 +919,11 @@ static int __init dump_ccw_init(void) return 0; } -extern char s390_readinfo_sccb[]; - static int __init dump_fcp_init(void) { int rc; - if(!(s390_readinfo_sccb[91] & 0x2)) + if(!(SCCB_FLAG & 0x2) || !SCCB_VALID) return 0; /* LDIPL DUMP is not installed */ if (!diag308_set_works) return 0; From bba125a6116e51faff98df1906bf77d06b644aea Mon Sep 17 00:00:00 2001 From: Ralph Wuerthner Date: Mon, 4 Dec 2006 15:40:00 +0100 Subject: [PATCH 06/34] [S390] update interface notes in zcrypt.h Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- include/asm-s390/zcrypt.h | 91 ++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h index 7244c68464f2..b90e55888a55 100644 --- a/include/asm-s390/zcrypt.h +++ b/include/asm-s390/zcrypt.h @@ -180,40 +180,8 @@ struct ica_xcRB { * for the implementation details for the contents of the * block * - * Z90STAT_TOTALCOUNT - * Return an integer count of all device types together. - * - * Z90STAT_PCICACOUNT - * Return an integer count of all PCICAs. - * - * Z90STAT_PCICCCOUNT - * Return an integer count of all PCICCs. - * - * Z90STAT_PCIXCCMCL2COUNT - * Return an integer count of all MCL2 PCIXCCs. - * - * Z90STAT_PCIXCCMCL3COUNT - * Return an integer count of all MCL3 PCIXCCs. - * - * Z90STAT_CEX2CCOUNT - * Return an integer count of all CEX2Cs. - * - * Z90STAT_CEX2ACOUNT - * Return an integer count of all CEX2As. - * - * Z90STAT_REQUESTQ_COUNT - * Return an integer count of the number of entries waiting to be - * sent to a device. - * - * Z90STAT_PENDINGQ_COUNT - * Return an integer count of the number of entries sent to a - * device awaiting the reply. - * - * Z90STAT_TOTALOPEN_COUNT - * Return an integer count of the number of open file handles. - * - * Z90STAT_DOMAIN_INDEX - * Return the integer value of the Cryptographic Domain. + * ZSECSENDCPRB + * Send an arbitrary CPRB to a crypto card. * * Z90STAT_STATUS_MASK * Return an 64 element array of unsigned chars for the status of @@ -235,28 +203,51 @@ struct ica_xcRB { * of successfully completed requests per device since the device * was detected and made available. * - * ICAZ90STATUS (deprecated) + * Z90STAT_REQUESTQ_COUNT + * Return an integer count of the number of entries waiting to be + * sent to a device. + * + * Z90STAT_PENDINGQ_COUNT + * Return an integer count of the number of entries sent to all + * devices awaiting the reply. + * + * Z90STAT_TOTALOPEN_COUNT + * Return an integer count of the number of open file handles. + * + * Z90STAT_DOMAIN_INDEX + * Return the integer value of the Cryptographic Domain. + * + * The following ioctls are deprecated and should be no longer used: + * + * Z90STAT_TOTALCOUNT + * Return an integer count of all device types together. + * + * Z90STAT_PCICACOUNT + * Return an integer count of all PCICAs. + * + * Z90STAT_PCICCCOUNT + * Return an integer count of all PCICCs. + * + * Z90STAT_PCIXCCMCL2COUNT + * Return an integer count of all MCL2 PCIXCCs. + * + * Z90STAT_PCIXCCMCL3COUNT + * Return an integer count of all MCL3 PCIXCCs. + * + * Z90STAT_CEX2CCOUNT + * Return an integer count of all CEX2Cs. + * + * Z90STAT_CEX2ACOUNT + * Return an integer count of all CEX2As. + * + * ICAZ90STATUS * Return some device driver status in a ica_z90_status struct * This takes an ica_z90_status struct as its arg. * - * NOTE: this ioctl() is deprecated, and has been replaced with - * single ioctl()s for each type of status being requested - * - * Z90STAT_PCIXCCCOUNT (deprecated) + * Z90STAT_PCIXCCCOUNT * Return an integer count of all PCIXCCs (MCL2 + MCL3). * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from * MCL2 PCIXCCs. - * - * Z90QUIESCE (not recommended) - * Quiesce the driver. This is intended to stop all new - * requests from being processed. Its use is NOT recommended, - * except in circumstances where there is no other way to stop - * callers from accessing the driver. Its original use was to - * allow the driver to be "drained" of work in preparation for - * a system shutdown. - * - * NOTE: once issued, this ban on new work cannot be undone - * except by unloading and reloading the driver. */ /** From 654452a48aa2bbfa276016a1e35d8988ff991ebb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:02 +0100 Subject: [PATCH 07/34] [S390] termio <-> termios conversion error handling. Get rid of our own user_termio_to_kernel_termios() and kernel_termios_to_user_termio() macros which didn't check for errors on user space accesses. Instead use the generic functions which handle this properly. In addition the generic version of user_termio_to_kernel_termios() also copies the c_line member which was missing in our variant. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/asm-s390/termios.h | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/include/asm-s390/termios.h b/include/asm-s390/termios.h index d1e29cca54c9..62b23caf370e 100644 --- a/include/asm-s390/termios.h +++ b/include/asm-s390/termios.h @@ -75,39 +75,7 @@ struct termio { */ #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ - unsigned short __tmp; \ - get_user(__tmp,&(termio)->x); \ - (termios)->x = (0xffff0000 & ((termios)->x)) | __tmp; \ -} - -#define user_termio_to_kernel_termios(termios, termio) \ -({ \ - SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ - SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ - copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ -}) - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -#define kernel_termios_to_user_termio(termio, termios) \ -({ \ - put_user((termios)->c_iflag, &(termio)->c_iflag); \ - put_user((termios)->c_oflag, &(termio)->c_oflag); \ - put_user((termios)->c_cflag, &(termio)->c_cflag); \ - put_user((termios)->c_lflag, &(termio)->c_lflag); \ - put_user((termios)->c_line, &(termio)->c_line); \ - copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ -}) - -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) +#include #endif /* __KERNEL__ */ From 3902e47628dcaf79b2c7a6a59f6978d968eff288 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 4 Dec 2006 15:40:05 +0100 Subject: [PATCH 08/34] [S390] No panic for failed reboot If reboot fails (e.g. because wrong devno has been specified by the user), we should just stop all cpus, but should not trigger a kernel panic. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 36f0d3004f94..60ba1454bfe1 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -697,7 +697,8 @@ void do_reipl(void) diag308(DIAG308_IPL, NULL); break; } - panic("reipl failed!\n"); + printk(KERN_EMERG "reboot failed!\n"); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); } static void do_dump(void) From 520a4e3728c214db6e91ade7b70443c2b9382de0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 4 Dec 2006 15:40:07 +0100 Subject: [PATCH 09/34] [S390] 3215 device locking. Remove lock pointer from 3215 device structure. Use get_ccwdev_lock for each use of the lock in the ccw-device structure. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3215.c | 50 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index d7de175d53f0..c9321b920e90 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -299,14 +299,14 @@ raw3215_timeout(unsigned long __data) struct raw3215_info *raw = (struct raw3215_info *) __data; unsigned long flags; - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); if (raw->flags & RAW3215_TIMER_RUNS) { del_timer(&raw->timer); raw->flags &= ~RAW3215_TIMER_RUNS; raw3215_mk_write_req(raw); raw3215_start_io(raw); } - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } /* @@ -355,10 +355,10 @@ raw3215_tasklet(void *data) unsigned long flags; raw = (struct raw3215_info *) data; - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw3215_mk_write_req(raw); raw3215_try_io(raw); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); /* Check for pending message from raw3215_irq */ if (raw->message != NULL) { printk(raw->message, raw->msg_dstat, raw->msg_cstat); @@ -512,9 +512,9 @@ raw3215_make_room(struct raw3215_info *raw, unsigned int length) if (RAW3215_BUFFER_SIZE - raw->count >= length) break; /* there might be another cpu waiting for the lock */ - spin_unlock(raw->lock); + spin_unlock(get_ccwdev_lock(raw->cdev)); udelay(100); - spin_lock(raw->lock); + spin_lock(get_ccwdev_lock(raw->cdev)); } } @@ -528,7 +528,7 @@ raw3215_write(struct raw3215_info *raw, const char *str, unsigned int length) int c, count; while (length > 0) { - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); count = (length > RAW3215_BUFFER_SIZE) ? RAW3215_BUFFER_SIZE : length; length -= count; @@ -555,7 +555,7 @@ raw3215_write(struct raw3215_info *raw, const char *str, unsigned int length) /* start or queue request */ raw3215_try_io(raw); } - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } } @@ -568,7 +568,7 @@ raw3215_putchar(struct raw3215_info *raw, unsigned char ch) unsigned long flags; unsigned int length, i; - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); if (ch == '\t') { length = TAB_STOP_SIZE - (raw->line_pos%TAB_STOP_SIZE); raw->line_pos += length; @@ -592,7 +592,7 @@ raw3215_putchar(struct raw3215_info *raw, unsigned char ch) /* start or queue request */ raw3215_try_io(raw); } - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } /* @@ -604,13 +604,13 @@ raw3215_flush_buffer(struct raw3215_info *raw) { unsigned long flags; - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); if (raw->count > 0) { raw->flags |= RAW3215_FLUSHING; raw3215_try_io(raw); raw->flags &= ~RAW3215_FLUSHING; } - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } /* @@ -625,9 +625,9 @@ raw3215_startup(struct raw3215_info *raw) return 0; raw->line_pos = 0; raw->flags |= RAW3215_ACTIVE; - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw3215_try_io(raw); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); return 0; } @@ -644,21 +644,21 @@ raw3215_shutdown(struct raw3215_info *raw) if (!(raw->flags & RAW3215_ACTIVE) || (raw->flags & RAW3215_FIXED)) return; /* Wait for outstanding requests, then free irq */ - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); if ((raw->flags & RAW3215_WORKING) || raw->queued_write != NULL || raw->queued_read != NULL) { raw->flags |= RAW3215_CLOSING; add_wait_queue(&raw->empty_wait, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); schedule(); - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); remove_wait_queue(&raw->empty_wait, &wait); set_current_state(TASK_RUNNING); raw->flags &= ~(RAW3215_ACTIVE | RAW3215_CLOSING); } - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } static int @@ -686,7 +686,6 @@ raw3215_probe (struct ccw_device *cdev) } raw->cdev = cdev; - raw->lock = get_ccwdev_lock(cdev); raw->inbuf = (char *) raw + sizeof(struct raw3215_info); memset(raw, 0, sizeof(struct raw3215_info)); raw->buffer = (char *) kmalloc(RAW3215_BUFFER_SIZE, @@ -809,9 +808,9 @@ con3215_unblank(void) unsigned long flags; raw = raw3215[0]; /* console 3215 is the first one */ - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw3215_make_room(raw, RAW3215_BUFFER_SIZE); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } static int __init @@ -873,7 +872,6 @@ con3215_init(void) raw->buffer = (char *) alloc_bootmem_low(RAW3215_BUFFER_SIZE); raw->inbuf = (char *) alloc_bootmem_low(RAW3215_INBUF_SIZE); raw->cdev = cdev; - raw->lock = get_ccwdev_lock(cdev); cdev->dev.driver_data = raw; cdev->handler = raw3215_irq; @@ -1066,10 +1064,10 @@ tty3215_unthrottle(struct tty_struct * tty) raw = (struct raw3215_info *) tty->driver_data; if (raw->flags & RAW3215_THROTTLED) { - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw->flags &= ~RAW3215_THROTTLED; raw3215_try_io(raw); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } } @@ -1096,10 +1094,10 @@ tty3215_start(struct tty_struct *tty) raw = (struct raw3215_info *) tty->driver_data; if (raw->flags & RAW3215_STOPPED) { - spin_lock_irqsave(raw->lock, flags); + spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw->flags &= ~RAW3215_STOPPED; raw3215_try_io(raw); - spin_unlock_irqrestore(raw->lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } } From 66a4263b991097397823b46377a43ae35541ec26 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 4 Dec 2006 15:40:10 +0100 Subject: [PATCH 10/34] [S390] Add MODALIAS= to the uevent for the ap bus. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 79d89c368919..6a54334ffe09 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -431,7 +431,15 @@ static int ap_uevent (struct device *dev, char **envp, int num_envp, ap_dev->device_type); if (buffer_size - length <= 0) return -ENOMEM; - envp[1] = 0; + buffer += length; + buffer_size -= length; + /* Add MODALIAS= */ + envp[1] = buffer; + length = scnprintf(buffer, buffer_size, "MODALIAS=ap:t%02X", + ap_dev->device_type); + if (buffer_size - length <= 0) + return -ENOMEM; + envp[2] = NULL; return 0; } From 5986b0e845bc52a53ff3cafd74d341e81c95658d Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 4 Dec 2006 15:40:13 +0100 Subject: [PATCH 11/34] [S390] Use diag instead of ccw reipl. Since the diag 308 reipl method is superior to the ccw method, we should use it whenever it is possible. We can do that, if the user has not specified a new reipl ccw device and the system has been ipled from a ccw device. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 60ba1454bfe1..ec127826f221 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -664,6 +664,8 @@ void do_reipl(void) switch (reipl_method) { case IPL_METHOD_CCW_CIO: devid.devno = reipl_block_ccw->ipl_info.ccw.devno; + if (ipl_get_type() == IPL_TYPE_CCW && devid.devno == ipl_devno) + diag308(DIAG308_IPL, NULL); devid.ssid = 0; reipl_ccw_dev(&devid); break; From 29145a6c8cda3238049937612365e80b53c3f266 Mon Sep 17 00:00:00 2001 From: Horst Hummel Date: Mon, 4 Dec 2006 15:40:15 +0100 Subject: [PATCH 12/34] [S390] Enhanced handling of failed termination requests. In case a request timed out and termination did not work, the console was flooded with retry messages (every 1/10s). Now we use a 5s delay per retry and generate a more precise message. Signed-off-by: Horst Hummel Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 79ffef6bfaf8..a2cef57d7bcb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1264,15 +1264,21 @@ __dasd_check_expire(struct dasd_device * device) if (list_empty(&device->ccw_queue)) return; cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list); - if (cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) { - if (time_after_eq(jiffies, cqr->expires + cqr->starttime)) { + if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) && + (time_after_eq(jiffies, cqr->expires + cqr->starttime))) { + if (device->discipline->term_IO(cqr) != 0) { + /* Hmpf, try again in 5 sec */ + dasd_set_timer(device, 5*HZ); + DEV_MESSAGE(KERN_ERR, device, + "internal error - timeout (%is) expired " + "for cqr %p, termination failed, " + "retrying in 5s", + (cqr->expires/HZ), cqr); + } else { DEV_MESSAGE(KERN_ERR, device, "internal error - timeout (%is) expired " "for cqr %p (%i retries left)", (cqr->expires/HZ), cqr, cqr->retries); - if (device->discipline->term_IO(cqr) != 0) - /* Hmpf, try again in 1/10 sec */ - dasd_set_timer(device, 10); } } } From 645c98c8b6c09eae58ac2f97e0ade6ced4d6443f Mon Sep 17 00:00:00 2001 From: Horst Hummel Date: Mon, 4 Dec 2006 15:40:18 +0100 Subject: [PATCH 13/34] [S390] return 'count' for successful execution of dasd_eer_enable. Currently the return value of 'dasd_eer_enable' is returned - even if the function returned '0'. Now return 'count' for successful execution. Signed-off-by: Horst Hummel Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_devmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index b5e70c523921..17fdd8c9f740 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -877,12 +877,13 @@ dasd_eer_store(struct device *dev, struct device_attribute *attr, if (((endp + 1) < (buf + count)) || (val > 1)) return -EINVAL; - rc = count; - if (val) + if (val) { rc = dasd_eer_enable(devmap->device); - else + if (rc) + return rc; + } else dasd_eer_disable(devmap->device); - return rc; + return count; } static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store); From 6b4044bdd158aa9ad07b3f68d1c7598036d3ee58 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:20 +0100 Subject: [PATCH 14/34] [S390] extmem unbalanced spin_lock. segment save will exit with a lock held if the passed segment doesn't exist. Any subsequent call to segment_save will lead to a deadlock. Fix this and give up the lock before returning. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 226275d5c4f6..077af5404948 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -563,8 +563,9 @@ segment_save(char *name) seg = segment_by_name (name); if (seg == NULL) { - PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name); - return; + PRINT_ERR("could not find segment %s in segment_save, please " + "report to linux390@de.ibm.com\n", name); + goto out; } startpfn = seg->start_addr >> PAGE_SHIFT; From 2254f5a7779452395e37ea2f7d6e1a550d34e678 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Mon, 4 Dec 2006 15:40:23 +0100 Subject: [PATCH 15/34] [S390] Some documentation typos. Signed-off-by: Nicolas Kaiser Signed-off-by: Martin Schwidefsky --- Documentation/s390/CommonIO | 4 +-- Documentation/s390/Debugging390.txt | 38 ++++++++++++------------ Documentation/s390/cds.txt | 12 ++++---- Documentation/s390/crypto/crypto-API.txt | 6 ++-- Documentation/s390/s390dbf.txt | 8 ++--- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index d684a6ac69a8..22f82f21bc60 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO @@ -74,7 +74,7 @@ Command line parameters Note: While already known devices can be added to the list of devices to be ignored, there will be no effect on then. However, if such a device - disappears and then reappeares, it will then be ignored. + disappears and then reappears, it will then be ignored. For example, "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" @@ -82,7 +82,7 @@ Command line parameters devices. The devices can be specified either by bus id (0.0.abcd) or, for 2.4 backward - compatibilty, by the device number in hexadecimal (0xabcd or abcd). + compatibility, by the device number in hexadecimal (0xabcd or abcd). * /proc/s390dbf/cio_*/ (S/390 debug feature) diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index 4dd25ee549e9..3f9ddbc23b27 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -7,7 +7,7 @@ Overview of Document: ===================== -This document is intended to give an good overview of how to debug +This document is intended to give a good overview of how to debug Linux for s/390 & z/Architecture. It isn't intended as a complete reference & not a tutorial on the fundamentals of C & assembly. It doesn't go into 390 IO in any detail. It is intended to complement the documents in the @@ -300,7 +300,7 @@ On z/Architecture our page indexes are now 2k in size but only mess with 2 segment indices each time we mess with a PMD. -3) As z/Architecture supports upto a massive 5-level page table lookup we +3) As z/Architecture supports up to a massive 5-level page table lookup we can only use 3 currently on Linux ( as this is all the generic kernel currently supports ) however this may change in future this allows us to access ( according to my sums ) @@ -502,7 +502,7 @@ Notes: ------ 1) The only requirement is that registers which are used by the callee are saved, e.g. the compiler is perfectly -capible of using r11 for purposes other than a frame a +capable of using r11 for purposes other than a frame a frame pointer if a frame pointer is not needed. 2) In functions with variable arguments e.g. printf the calling procedure is identical to one without variable arguments & the same number of @@ -846,7 +846,7 @@ of time searching for debugging info. The following self explanatory line should instead if the code isn't compiled -g, as it is much faster: objdump --disassemble-all --syms vmlinux > vmlinux.lst -As hard drive space is valuble most of us use the following approach. +As hard drive space is valuable most of us use the following approach. 1) Look at the emitted psw on the console to find the crash address in the kernel. 2) Look at the file System.map ( in the linux directory ) produced when building the kernel to find the closest address less than the current PSW to find the @@ -902,7 +902,7 @@ A. It is a tool for intercepting calls to the kernel & logging them to a file & on the screen. Q. What use is it ? -A. You can used it to find out what files a particular program opens. +A. You can use it to find out what files a particular program opens. @@ -911,7 +911,7 @@ Example 1 If you wanted to know does ping work but didn't have the source strace ping -c 1 127.0.0.1 & then look at the man pages for each of the syscalls below, -( In fact this is sometimes easier than looking at some spagetti +( In fact this is sometimes easier than looking at some spaghetti source which conditionally compiles for several architectures ). Not everything that it throws out needs to make sense immediately. @@ -1037,7 +1037,7 @@ e.g. man strace, man alarm, man socket. Performance Debugging ===================== -gcc is capible of compiling in profiling code just add the -p option +gcc is capable of compiling in profiling code just add the -p option to the CFLAGS, this obviously affects program size & performance. This can be used by the gprof gnu profiling tool or the gcov the gnu code coverage tool ( code coverage is a means of testing @@ -1419,7 +1419,7 @@ On a SMP guest issue a command to all CPUs try prefixing the command with cpu al To issue a command to a particular cpu try cpu e.g. CPU 01 TR I R 2000.3000 If you are running on a guest with several cpus & you have a IO related problem -& cannot follow the flow of code but you know it isnt smp related. +& cannot follow the flow of code but you know it isn't smp related. from the bash prompt issue shutdown -h now or halt. do a Q CPUS to find out how many cpus you have @@ -1602,7 +1602,7 @@ V000FFFD0 00010400 80010802 8001085A 000FFFA0 our 3rd return address is 8001085A as the 04B52002 looks suspiciously like rubbish it is fair to assume that the kernel entry routines -for the sake of optimisation dont set up a backchain. +for the sake of optimisation don't set up a backchain. now look at System.map to see if the addresses make any sense. @@ -1638,11 +1638,11 @@ more useful information. Unlike other bus architectures modern 390 systems do their IO using mostly fibre optics & devices such as tapes & disks can be shared between several mainframes, -also S390 can support upto 65536 devices while a high end PC based system might be choking +also S390 can support up to 65536 devices while a high end PC based system might be choking with around 64. Here is some of the common IO terminology Subchannel: -This is the logical number most IO commands use to talk to an IO device there can be upto +This is the logical number most IO commands use to talk to an IO device there can be up to 0x10000 (65536) of these in a configuration typically there is a few hundred. Under VM for simplicity they are allocated contiguously, however on the native hardware they are not they typically stay consistent between boots provided no new hardware is inserted or removed. @@ -1651,7 +1651,7 @@ HALT SUBCHANNEL,MODIFY SUBCHANNEL,RESUME SUBCHANNEL,START SUBCHANNEL,STORE SUBCH TEST SUBCHANNEL ) we use this as the ID of the device we wish to talk to, the most important of these instructions are START SUBCHANNEL ( to start IO ), TEST SUBCHANNEL ( to check whether the IO completed successfully ), & HALT SUBCHANNEL ( to kill IO ), a subchannel -can have up to 8 channel paths to a device this offers redunancy if one is not available. +can have up to 8 channel paths to a device this offers redundancy if one is not available. Device Number: @@ -1659,7 +1659,7 @@ This number remains static & Is closely tied to the hardware, there are 65536 of also they are made up of a CHPID ( Channel Path ID, the most significant 8 bits ) & another lsb 8 bits. These remain static even if more devices are inserted or removed from the hardware, there is a 1 to 1 mapping between Subchannels & Device Numbers provided -devices arent inserted or removed. +devices aren't inserted or removed. Channel Control Words: CCWS are linked lists of instructions initially pointed to by an operation request block (ORB), @@ -1674,7 +1674,7 @@ concurrently, you check how the IO went on by issuing a TEST SUBCHANNEL at each from which you receive an Interruption response block (IRB). If you get channel & device end status in the IRB without channel checks etc. your IO probably went okay. If you didn't you probably need a doctor to examine the IRB & extended status word etc. -If an error occurs, more sophistocated control units have a facitity known as +If an error occurs, more sophisticated control units have a facility known as concurrent sense this means that if an error occurs Extended sense information will be presented in the Extended status word in the IRB if not you have to issue a subsequent SENSE CCW command after the test subchannel. @@ -1749,7 +1749,7 @@ Interface (OEMI). This byte wide Parallel channel path/bus has parity & data on the "Bus" cable & control lines on the "Tag" cable. These can operate in byte multiplex mode for sharing between several slow devices or burst mode & monopolize the channel for the -whole burst. Upto 256 devices can be addressed on one of these cables. These cables are +whole burst. Up to 256 devices can be addressed on one of these cables. These cables are about one inch in diameter. The maximum unextended length supported by these cables is 125 Meters but this can be extended up to 2km with a fibre optic channel extended such as a 3044. The maximum burst speed supported is 4.5 megabytes per second however @@ -1759,7 +1759,7 @@ One of these paths can be daisy chained to up to 8 control units. ESCON if fibre optic it is also called FICON Was introduced by IBM in 1990. Has 2 fibre optic cables & uses either leds or lasers -for communication at a signaling rate of upto 200 megabits/sec. As 10bits are transferred +for communication at a signaling rate of up to 200 megabits/sec. As 10bits are transferred for every 8 bits info this drops to 160 megabits/sec & to 18.6 Megabytes/sec once control info & CRC are added. ESCON only operates in burst mode. @@ -1767,7 +1767,7 @@ ESCONs typical max cable length is 3km for the led version & 20km for the laser known as XDF ( extended distance facility ). This can be further extended by using an ESCON director which triples the above mentioned ranges. Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture the standard Bus & Tag control protocol -is however present within the packets. Upto 256 devices can be attached to each control +is however present within the packets. Up to 256 devices can be attached to each control unit that uses one of these interfaces. Common 390 Devices include: @@ -2050,7 +2050,7 @@ list test.c:1,10 directory: Adds directories to be searched for source if gdb cannot find the source. -(note it is a bit sensititive about slashes) +(note it is a bit sensitive about slashes) e.g. To add the root of the filesystem to the searchpath do directory // @@ -2152,7 +2152,7 @@ program as if it just crashed on your system, it is usually called core & create current working directory. This is very useful in that a customer can mail a core dump to a technical support department & the technical support department can reconstruct what happened. -Provided the have an identical copy of this program with debugging symbols compiled in & +Provided they have an identical copy of this program with debugging symbols compiled in & the source base of this build is available. In short it is far more useful than something like a crash log could ever hope to be. diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt index 32a96cc39215..05a2b4f7e38f 100644 --- a/Documentation/s390/cds.txt +++ b/Documentation/s390/cds.txt @@ -98,7 +98,7 @@ The following chapters describe the I/O related interface routines the Linux/390 common device support (CDS) provides to allow for device specific driver implementations on the IBM ESA/390 hardware platform. Those interfaces intend to provide the functionality required by every device driver -implementaion to allow to drive a specific hardware device on the ESA/390 +implementation to allow to drive a specific hardware device on the ESA/390 platform. Some of the interface routines are specific to Linux/390 and some of them can be found on other Linux platforms implementations too. Miscellaneous function prototypes, data declarations, and macro definitions @@ -114,7 +114,7 @@ the ESA/390 architecture has implemented a so called channel subsystem, that provides a unified view of the devices physically attached to the systems. Though the ESA/390 hardware platform knows about a huge variety of different peripheral attachments like disk devices (aka. DASDs), tapes, communication -controllers, etc. they can all by accessed by a well defined access method and +controllers, etc. they can all be accessed by a well defined access method and they are presenting I/O completion a unified way : I/O interruptions. Every single device is uniquely identified to the system by a so called subchannel, where the ESA/390 architecture allows for 64k devices be attached. @@ -338,7 +338,7 @@ DOIO_REPORT_ALL - report all interrupt conditions The ccw_device_start() function returns : 0 - successful completion or request successfully initiated --EBUSY - The device is currently processing a previous I/O request, or ther is +-EBUSY - The device is currently processing a previous I/O request, or there is a status pending at the device. -ENODEV - cdev is invalid, the device is not operational or the ccw_device is not online. @@ -361,7 +361,7 @@ first: -EIO: the common I/O layer terminated the request due to an error state If the concurrent sense flag in the extended status word in the irb is set, the -field irb->scsw.count describes the numer of device specific sense bytes +field irb->scsw.count describes the number of device specific sense bytes available in the extended control word irb->scsw.ecw[0]. No device sensing by the device driver itself is required. @@ -410,7 +410,7 @@ ccw_device_start() must be called disabled and with the ccw device lock held. The device driver is allowed to issue the next ccw_device_start() call from within its interrupt handler already. It is not required to schedule a -bottom-half, unless an non deterministically long running error recovery procedure +bottom-half, unless a non deterministically long running error recovery procedure or similar needs to be scheduled. During I/O processing the Linux/390 generic I/O device driver support has already obtained the IRQ lock, i.e. the handler must not try to obtain it again when calling ccw_device_start() or we end in a @@ -431,7 +431,7 @@ information prior to device-end the device driver urgently relies on. In this case all I/O interruptions are presented to the device driver until final status is recognized. -If a device is able to recover from asynchronosly presented I/O errors, it can +If a device is able to recover from asynchronously presented I/O errors, it can perform overlapping I/O using the DOIO_EARLY_NOTIFICATION flag. While some devices always report channel-end and device-end together, with a single interrupt, others present primary status (channel-end) when the channel is diff --git a/Documentation/s390/crypto/crypto-API.txt b/Documentation/s390/crypto/crypto-API.txt index 41a8b07da05a..71ae6ca9f2c2 100644 --- a/Documentation/s390/crypto/crypto-API.txt +++ b/Documentation/s390/crypto/crypto-API.txt @@ -17,8 +17,8 @@ arch/s390/crypto directory. 2. Probing for availability of MSA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It should be possible to use Kernels with the z990 crypto implementations both -on machines with MSA available an on those without MSA (pre z990 or z990 -without MSA). Therefore a simple probing mechanisms has been implemented: +on machines with MSA available and on those without MSA (pre z990 or z990 +without MSA). Therefore a simple probing mechanism has been implemented: In the init function of each crypto module the availability of MSA and of the respective crypto algorithm in particular will be tested. If the algorithm is available the module will load and register its algorithm with the crypto API. @@ -26,7 +26,7 @@ available the module will load and register its algorithm with the crypto API. If the respective crypto algorithm is not available, the init function will return -ENOSYS. In that case a fallback to the standard software implementation of the crypto algorithm must be taken ( -> the standard crypto modules are -also build when compiling the kernel). +also built when compiling the kernel). 3. Ensuring z990 crypto module preference diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index 000230cd26db..0eb7c58916de 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -36,7 +36,7 @@ switches to the next debug area. This is done in order to be sure that the records which describe the origin of the exception are not overwritten when a wrap around for the current area occurs. -The debug areas itselve are also ordered in form of a ring buffer. +The debug areas themselves are also ordered in form of a ring buffer. When an exception is thrown in the last debug area, the following debug entries are then written again in the very first area. @@ -55,7 +55,7 @@ The debug logs can be inspected in a live system through entries in the debugfs-filesystem. Under the toplevel directory "s390dbf" there is a directory for each registered component, which is named like the corresponding component. The debugfs normally should be mounted to -/sys/kernel/debug therefore the debug feature can be accessed unter +/sys/kernel/debug therefore the debug feature can be accessed under /sys/kernel/debug/s390dbf. The content of the directories are files which represent different views @@ -87,11 +87,11 @@ There are currently 2 possible triggers, which stop the debug feature globally. The first possibility is to use the "debug_active" sysctl. If set to 1 the debug feature is running. If "debug_active" is set to 0 the debug feature is turned off. -The second trigger which stops the debug feature is an kernel oops. +The second trigger which stops the debug feature is a kernel oops. That prevents the debug feature from overwriting debug information that happened before the oops. After an oops you can reactivate the debug feature by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not -suggested to use an oopsed kernel in an production environment. +suggested to use an oopsed kernel in a production environment. If you want to disallow the deactivation of the debug feature, you can use the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug feature cannot be stopped. If the debug feature is already stopped, it From 15e9b586e0bd3692e2a21c5be178810d9d32214e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:26 +0100 Subject: [PATCH 16/34] [S390] Reset infrastructure for re-IPL. In case of re-IPL and diag308 doesn't work we have to reset all devices manually and wait synchronously that each reset finished. This patch adds the necessary infrastucture and the first exploiter of it. Subsystems that need to add a function that needs to be called at re-IPL may register/unregister this function via struct reset_call { struct reset_call *next; void (*fn)(void); }; void register_reset_call(struct reset_call *reset); void unregister_reset_call(struct reset_call *reset); When the registered function get called the context is: - all cpus beside the current one are stopped - all machine checks and interrupts are disabled - prefixing is disabled - a default machine check handler is available for use The registered functions may not take any locks are sleep. For the common I/O layer part of this patch: Introduce a reset_call css_reset that does the following: - clear all subchannels - perform a rchp on all channel paths and wait for the resulting machine checks This replaces the calls to clear_all_subchannels() and cio_reset_channel_paths() for kexec and ccw reipl. reipl_ccw_dev() now uses reipl_find_schid() to determine the subchannel id for a given device id. Also remove cio_reset_channel_paths() and friends since they are not needed anymore. Signed-off-by: Heiko Carstens Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/ipl.c | 54 +++++++++++ arch/s390/kernel/machine_kexec.c | 9 +- arch/s390/kernel/reipl.S | 17 +--- arch/s390/kernel/reipl64.S | 16 ++-- arch/s390/kernel/relocate_kernel.S | 5 +- arch/s390/kernel/relocate_kernel64.S | 5 +- arch/s390/kernel/reset.S | 48 ++++++++++ drivers/s390/cio/chsc.c | 35 -------- drivers/s390/cio/cio.c | 128 +++++++++++++++++++++------ include/asm-s390/cio.h | 4 - include/asm-s390/lowcore.h | 8 ++ include/asm-s390/reset.h | 23 +++++ 13 files changed, 246 insertions(+), 108 deletions(-) create mode 100644 arch/s390/kernel/reset.S create mode 100644 include/asm-s390/reset.h diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index aa978978d3d1..a81881c9b297 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -4,7 +4,7 @@ EXTRA_AFLAGS := -traditional -obj-y := bitmap.o traps.o time.o process.o \ +obj-y := bitmap.o traps.o time.o process.o reset.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ semaphore.o s390_ext.o debug.o profile.o irq.o ipl.o diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ec127826f221..77f4aff630fe 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -19,6 +19,7 @@ #include #include #include +#include #define IPL_PARM_BLOCK_VERSION 0 #define LOADPARM_LEN 8 @@ -1024,3 +1025,56 @@ static int __init s390_ipl_init(void) } __initcall(s390_ipl_init); + +static LIST_HEAD(rcall); +static DEFINE_MUTEX(rcall_mutex); + +void register_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_add(&reset->list, &rcall); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(register_reset_call); + +void unregister_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_del(&reset->list); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(unregister_reset_call); + +static void do_reset_calls(void) +{ + struct reset_call *reset; + + list_for_each_entry(reset, &rcall, list) + reset->fn(); +} + +extern void reset_mcck_handler(void); + +void s390_reset_system(void) +{ + struct _lowcore *lc; + + /* Disable all interrupts/machine checks */ + __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); + + /* Stack for interrupt/machine check handler */ + lc = (struct _lowcore *)(unsigned long) store_prefix(); + lc->panic_stack = S390_lowcore.panic_stack; + + /* Disable prefixing */ + set_prefix(0); + + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + + /* Set new machine check handler */ + S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) &reset_mcck_handler; + do_reset_calls(); +} diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 60b1ea9f946b..202bf1fdfe39 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -22,6 +22,7 @@ #include #include #include +#include static void kexec_halt_all_cpus(void *); @@ -62,12 +63,6 @@ machine_shutdown(void) NORET_TYPE void machine_kexec(struct kimage *image) { - clear_all_subchannels(); - cio_reset_channel_paths(); - - /* Disable lowcore protection */ - ctl_clear_bit(0,28); - on_each_cpu(kexec_halt_all_cpus, image, 0, 0); for (;;); } @@ -98,6 +93,8 @@ kexec_halt_all_cpus(void *kernel_image) cpu_relax(); } + s390_reset_system(); + image = (struct kimage *) kernel_image; data_mover = (relocate_kernel_t) (page_to_pfn(image->control_code_page) << PAGE_SHIFT); diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 0340477f3b08..f9434d42ce9f 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -11,19 +11,10 @@ .globl do_reipl_asm do_reipl_asm: basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) - - # switch off lowcore protection - -.Lpg1: stctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) - stctl %c0,%c0,.Lctlsave2-.Lpg0(%r13) - ni .Lctlsave1-.Lpg0(%r13),0xef - lctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) - - # do store status of all registers +.Lpg1: # do store status of all registers stm %r0,%r15,__LC_GPREGS_SAVE_AREA stctl %c0,%c15,__LC_CREGS_SAVE_AREA - mvc __LC_CREGS_SAVE_AREA(4),.Lctlsave2-.Lpg0(%r13) stam %a0,%a15,__LC_AREGS_SAVE_AREA stpx __LC_PREFIX_SAVE_AREA stckc .Lclkcmp-.Lpg0(%r13) @@ -56,8 +47,7 @@ do_reipl_asm: basr %r13,0 .L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 jz .L003 bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID +.L003: st %r1,__LC_SUBCHANNEL_ID lpsw 0 sigp 0,0,0(6) .Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) @@ -65,9 +55,6 @@ do_reipl_asm: basr %r13,0 .align 8 .Lclkcmp: .quad 0x0000000000000000 .Lall: .long 0xff000000 -.Lnull: .long 0x00000000 -.Lctlsave1: .long 0x00000000 -.Lctlsave2: .long 0x00000000 .align 8 .Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 .Lpcnew: .long 0x00080000,0x80000000+.Lecs diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index de7435054f7c..f18ef260ca23 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -10,10 +10,10 @@ #include .globl do_reipl_asm do_reipl_asm: basr %r13,0 +.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) +.Lpg1: # do store status of all registers - # do store status of all registers - -.Lpg0: stg %r1,.Lregsave-.Lpg0(%r13) + stg %r1,.Lregsave-.Lpg0(%r13) lghi %r1,0x1000 stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) lg %r0,.Lregsave-.Lpg0(%r13) @@ -27,11 +27,7 @@ do_reipl_asm: basr %r13,0 stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) - lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: lctlg %c6,%c6,.Lall-.Lpg0(%r13) - stctg %c0,%c0,.Lregsave-.Lpg0(%r13) - ni .Lregsave+4-.Lpg0(%r13),0xef - lctlg %c0,%c0,.Lregsave-.Lpg0(%r13) + lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) @@ -56,8 +52,7 @@ do_reipl_asm: basr %r13,0 .L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 jz .L003 bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID +.L003: st %r1,__LC_SUBCHANNEL_ID lhi %r1,0 # mode 0 = esa slr %r0,%r0 # set cpuid to zero sigp %r1,%r0,0x12 # switch to esa mode @@ -70,7 +65,6 @@ do_reipl_asm: basr %r13,0 .Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 .Lregsave: .quad 0x0000000000000000 -.Lnull: .long 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index f9899ff2e5b0..3b456b80bcee 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -26,8 +26,7 @@ relocate_kernel: basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 # disable DAT and IRQ (external) - spx zero64-.base(%r13) # absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctl %c0,%c15,ctlregs-.base(%r13) stm %r0,%r15,gprregs-.base(%r13) la %r1,load_psw-.base(%r13) @@ -97,8 +96,6 @@ lpsw 0 # hopefully start new kernel... .align 8 - zero64: - .quad 0 load_psw: .long 0x00080000,0x80000000 sys_msk: diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S index 4fb443042d9c..1f9ea2067b59 100644 --- a/arch/s390/kernel/relocate_kernel64.S +++ b/arch/s390/kernel/relocate_kernel64.S @@ -27,8 +27,7 @@ relocate_kernel: basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 # disable DAT and IRQs - spx zero64-.base(%r13) # absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctg %c0,%c15,ctlregs-.base(%r13) stmg %r0,%r15,gprregs-.base(%r13) lghi %r0,3 @@ -100,8 +99,6 @@ lpsw 0 # hopefully start new kernel... .align 8 - zero64: - .quad 0 load_psw: .long 0x00080000,0x80000000 sys_msk: diff --git a/arch/s390/kernel/reset.S b/arch/s390/kernel/reset.S new file mode 100644 index 000000000000..be8688c0665c --- /dev/null +++ b/arch/s390/kernel/reset.S @@ -0,0 +1,48 @@ +/* + * arch/s390/kernel/reset.S + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens + */ + +#include +#include + +#ifdef CONFIG_64BIT + + .globl reset_mcck_handler +reset_mcck_handler: + basr %r13,0 +0: lg %r15,__LC_PANIC_STACK # load panic stack + aghi %r15,-STACK_FRAME_OVERHEAD + lg %r1,s390_reset_mcck_handler-0b(%r13) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: la %r1,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + lpswe __LC_MCK_OLD_PSW + + .globl s390_reset_mcck_handler +s390_reset_mcck_handler: + .quad 0 + +#else /* CONFIG_64BIT */ + + .globl reset_mcck_handler +reset_mcck_handler: + basr %r13,0 +0: l %r15,__LC_PANIC_STACK # load panic stack + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,s390_reset_mcck_handler-0b(%r13) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA + lpsw __LC_MCK_OLD_PSW + + .globl s390_reset_mcck_handler +s390_reset_mcck_handler: + .long 0 + +#endif /* CONFIG_64BIT */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 2d78f0f4a40f..9d92540bd99e 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1465,41 +1465,6 @@ chsc_get_chp_desc(struct subchannel *sch, int chp_no) return desc; } -static int reset_channel_path(struct channel_path *chp) -{ - int cc; - - cc = rchp(chp->id); - switch (cc) { - case 0: - return 0; - case 2: - return -EBUSY; - default: - return -ENODEV; - } -} - -static void reset_channel_paths_css(struct channel_subsystem *css) -{ - int i; - - for (i = 0; i <= __MAX_CHPID; i++) { - if (css->chps[i]) - reset_channel_path(css->chps[i]); - } -} - -void cio_reset_channel_paths(void) -{ - int i; - - for (i = 0; i <= __MAX_CSSID; i++) { - if (css[i] && css[i]->valid) - reset_channel_paths_css(css[i]); - } -} - static int __init chsc_alloc_sei_area(void) { diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 8936e460a807..20aee2783847 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "airq.h" #include "cio.h" #include "css.h" @@ -28,6 +29,7 @@ #include "ioasm.h" #include "blacklist.h" #include "cio_debug.h" +#include "../s390mach.h" debug_info_t *cio_debug_msg_id; debug_info_t *cio_debug_trace_id; @@ -841,26 +843,12 @@ __clear_subchannel_easy(struct subchannel_id schid) return -EBUSY; } -struct sch_match_id { - struct subchannel_id schid; - struct ccw_dev_id devid; - int rc; -}; - -static int __shutdown_subchannel_easy_and_match(struct subchannel_id schid, - void *data) +static int __shutdown_subchannel_easy(struct subchannel_id schid, void *data) { struct schib schib; - struct sch_match_id *match_id = data; if (stsch_err(schid, &schib)) return -ENXIO; - if (match_id && schib.pmcw.dnv && - (schib.pmcw.dev == match_id->devid.devno) && - (schid.ssid == match_id->devid.ssid)) { - match_id->schid = schid; - match_id->rc = 0; - } if (!schib.pmcw.ena) return 0; switch(__disable_subchannel_easy(schid, &schib)) { @@ -876,27 +864,111 @@ static int __shutdown_subchannel_easy_and_match(struct subchannel_id schid, return 0; } -static int clear_all_subchannels_and_match(struct ccw_dev_id *devid, - struct subchannel_id *schid) +static atomic_t chpid_reset_count; + +static void s390_reset_chpids_mcck_handler(void) +{ + struct crw crw; + struct mci *mci; + + /* Check for pending channel report word. */ + mci = (struct mci *)&S390_lowcore.mcck_interruption_code; + if (!mci->cp) + return; + /* Process channel report words. */ + while (stcrw(&crw) == 0) { + /* Check for responses to RCHP. */ + if (crw.slct && crw.rsc == CRW_RSC_CPATH) + atomic_dec(&chpid_reset_count); + } +} + +#define RCHP_TIMEOUT (30 * USEC_PER_SEC) +static void css_reset(void) +{ + int i, ret; + unsigned long long timeout; + + /* Reset subchannels. */ + for_each_subchannel(__shutdown_subchannel_easy, NULL); + /* Reset channel paths. */ + s390_reset_mcck_handler = s390_reset_chpids_mcck_handler; + /* Enable channel report machine checks. */ + __ctl_set_bit(14, 28); + /* Temporarily reenable machine checks. */ + local_mcck_enable(); + for (i = 0; i <= __MAX_CHPID; i++) { + ret = rchp(i); + if ((ret == 0) || (ret == 2)) + /* + * rchp either succeeded, or another rchp is already + * in progress. In either case, we'll get a crw. + */ + atomic_inc(&chpid_reset_count); + } + /* Wait for machine check for all channel paths. */ + timeout = get_clock() + (RCHP_TIMEOUT << 12); + while (atomic_read(&chpid_reset_count) != 0) { + if (get_clock() > timeout) + break; + cpu_relax(); + } + /* Disable machine checks again. */ + local_mcck_disable(); + /* Disable channel report machine checks. */ + __ctl_clear_bit(14, 28); + s390_reset_mcck_handler = NULL; +} + +static struct reset_call css_reset_call = { + .fn = css_reset, +}; + +static int __init init_css_reset_call(void) +{ + atomic_set(&chpid_reset_count, 0); + register_reset_call(&css_reset_call); + return 0; +} + +arch_initcall(init_css_reset_call); + +struct sch_match_id { + struct subchannel_id schid; + struct ccw_dev_id devid; + int rc; +}; + +static int __reipl_subchannel_match(struct subchannel_id schid, void *data) +{ + struct schib schib; + struct sch_match_id *match_id = data; + + if (stsch_err(schid, &schib)) + return -ENXIO; + if (schib.pmcw.dnv && + (schib.pmcw.dev == match_id->devid.devno) && + (schid.ssid == match_id->devid.ssid)) { + match_id->schid = schid; + match_id->rc = 0; + return 1; + } + return 0; +} + +static int reipl_find_schid(struct ccw_dev_id *devid, + struct subchannel_id *schid) { struct sch_match_id match_id; match_id.devid = *devid; match_id.rc = -ENODEV; - local_irq_disable(); - for_each_subchannel(__shutdown_subchannel_easy_and_match, &match_id); + for_each_subchannel(__reipl_subchannel_match, &match_id); if (match_id.rc == 0) *schid = match_id.schid; return match_id.rc; } - -void clear_all_subchannels(void) -{ - local_irq_disable(); - for_each_subchannel(__shutdown_subchannel_easy_and_match, NULL); -} - extern void do_reipl_asm(__u32 schid); /* Make sure all subchannels are quiet before we re-ipl an lpar. */ @@ -904,9 +976,9 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) { struct subchannel_id schid; - if (clear_all_subchannels_and_match(devid, &schid)) + s390_reset_system(); + if (reipl_find_schid(devid, &schid) != 0) panic("IPL Device not found\n"); - cio_reset_channel_paths(); do_reipl_asm(*((__u32*)&schid)); } diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index 81287d86329d..cabd5bb74b5a 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -285,10 +285,6 @@ extern int diag210(struct diag210 *addr); extern void wait_cons_dev(void); -extern void clear_all_subchannels(void); - -extern void cio_reset_channel_paths(void); - extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 06583ed0bde7..74f7389bd3ee 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -362,6 +362,14 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "m" (address) : "memory"); } +static inline __u32 store_prefix(void) +{ + __u32 address; + + asm volatile("stpx %0" : "=m" (address)); + return address; +} + #define __PANIC_MAGIC 0xDEADC0DE #endif diff --git a/include/asm-s390/reset.h b/include/asm-s390/reset.h new file mode 100644 index 000000000000..9b439cf67800 --- /dev/null +++ b/include/asm-s390/reset.h @@ -0,0 +1,23 @@ +/* + * include/asm-s390/reset.h + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens + */ + +#ifndef _ASM_S390_RESET_H +#define _ASM_S390_RESET_H + +#include + +struct reset_call { + struct list_head list; + void (*fn)(void); +}; + +extern void register_reset_call(struct reset_call *reset); +extern void unregister_reset_call(struct reset_call *reset); +extern void s390_reset_system(void); +extern void (*s390_reset_mcck_handler)(void); + +#endif /* _ASM_S390_RESET_H */ From a1a392f0b4f27604811bf8aa8d7636b3b4bc3803 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:28 +0100 Subject: [PATCH 17/34] [S390] Bad kexec control page allocation. KEXEC_CONTROL_MEMORY_LIMIT is an unsigned long value and therefore should be defined as one. Otherwise the kexec control page can be allocated above 2GB which will cause a specification exception on the sam31 instruction in the s390 kexec relocation code. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/asm-s390/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-s390/kexec.h b/include/asm-s390/kexec.h index ce28ddda0f50..9c35c8ad1afd 100644 --- a/include/asm-s390/kexec.h +++ b/include/asm-s390/kexec.h @@ -26,7 +26,7 @@ /* Maximum address we can use for the control pages */ /* Not more than 2GB */ -#define KEXEC_CONTROL_MEMORY_LIMIT (1<<31) +#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_CODE_SIZE 4096 From 740b5706b9c4b3767f597b3ea76654c6f2a800b2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:30 +0100 Subject: [PATCH 18/34] [S390] cpcmd <-> __cpcmd calling issues In case of reipl cpcmd gets called when all other cpus are not running anymore. To prevent deadlocks change __cpcmd so that it doesn't take any locks and call cpcmd or __cpcmd, whatever is correct in the current context. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/cpcmd.c | 18 +++++++++++------- arch/s390/kernel/ipl.c | 10 +++++----- arch/s390/kernel/setup.c | 10 +++++----- include/asm-s390/cpcmd.h | 10 +++------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 1eae74e72f95..a5972f1541fe 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -21,14 +21,15 @@ static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; /* - * the caller of __cpcmd has to ensure that the response buffer is below 2 GB + * __cpcmd has some restrictions over cpcmd + * - the response buffer must reside below 2GB (if any) + * - __cpcmd is unlocked and therefore not SMP-safe */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { - unsigned long flags, cmdlen; + unsigned cmdlen; int return_code, return_len; - spin_lock_irqsave(&cpcmd_lock, flags); cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); memcpy(cpcmd_buf, cmd, cmdlen); @@ -74,7 +75,6 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "+d" (reg3) : "d" (reg2) : "cc"); return_code = (int) reg3; } - spin_unlock_irqrestore(&cpcmd_lock, flags); if (response_code != NULL) *response_code = return_code; return return_len; @@ -82,15 +82,18 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) EXPORT_SYMBOL(__cpcmd); -#ifdef CONFIG_64BIT int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; int len; + unsigned long flags; if ((rlen == 0) || (response == NULL) - || !((unsigned long)response >> 31)) + || !((unsigned long)response >> 31)) { + spin_lock_irqsave(&cpcmd_lock, flags); len = __cpcmd(cmd, response, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); + } else { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { @@ -98,7 +101,9 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) "cpcmd: could not allocate response buffer\n"); return -ENOMEM; } + spin_lock_irqsave(&cpcmd_lock, flags); len = __cpcmd(cmd, lowbuf, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); memcpy(response, lowbuf, rlen); kfree(lowbuf); } @@ -106,4 +111,3 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) } EXPORT_SYMBOL(cpcmd); -#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 77f4aff630fe..101b003cfabf 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -677,7 +677,7 @@ void do_reipl(void) else sprintf(buf, "IPL %X LOADPARM '%s'", reipl_block_ccw->ipl_info.ccw.devno, loadparm); - cpcmd(buf, NULL, 0, NULL); + __cpcmd(buf, NULL, 0, NULL); break; case IPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); @@ -691,12 +691,12 @@ void do_reipl(void) diag308(DIAG308_IPL, NULL); break; case IPL_METHOD_FCP_RO_VM: - cpcmd("IPL", NULL, 0, NULL); + __cpcmd("IPL", NULL, 0, NULL); break; case IPL_METHOD_NONE: default: if (MACHINE_IS_VM) - cpcmd("IPL", NULL, 0, NULL); + __cpcmd("IPL", NULL, 0, NULL); diag308(DIAG308_IPL, NULL); break; } @@ -732,9 +732,9 @@ static void do_dump(void) case IPL_METHOD_CCW_VM: dump_smp_stop_all(); sprintf(buf, "STORE STATUS"); - cpcmd(buf, NULL, 0, NULL); + __cpcmd(buf, NULL, 0, NULL); sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); - cpcmd(buf, NULL, 0, NULL); + __cpcmd(buf, NULL, 0, NULL); break; case IPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, dump_block_ccw); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 2aa13e8e000a..9bbef0c65584 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -229,11 +229,11 @@ static void __init conmode_default(void) char *ptr; if (MACHINE_IS_VM) { - __cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); + cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); console_irq = simple_strtoul(ptr + 13, NULL, 16); - __cpcmd("QUERY TERM", query_buffer, 1024, NULL); + cpcmd("QUERY TERM", query_buffer, 1024, NULL); ptr = strstr(query_buffer, "CONMODE"); /* * Set the conmode to 3215 so that the device recognition @@ -242,7 +242,7 @@ static void __init conmode_default(void) * 3215 and the 3270 driver will try to access the console * device (3215 as console and 3270 as normal tty). */ - __cpcmd("TERM CONMODE 3215", NULL, 0, NULL); + cpcmd("TERM CONMODE 3215", NULL, 0, NULL); if (ptr == NULL) { #if defined(CONFIG_SCLP_CONSOLE) SET_CONSOLE_SCLP; @@ -299,14 +299,14 @@ static void do_machine_restart_nonsmp(char * __unused) static void do_machine_halt_nonsmp(void) { if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0, NULL); + __cpcmd(vmhalt_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } static void do_machine_power_off_nonsmp(void) { if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0, NULL); + __cpcmd(vmpoff_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } diff --git a/include/asm-s390/cpcmd.h b/include/asm-s390/cpcmd.h index 1fcf65be7a23..48a9eab16429 100644 --- a/include/asm-s390/cpcmd.h +++ b/include/asm-s390/cpcmd.h @@ -7,8 +7,8 @@ * Christian Borntraeger (cborntra@de.ibm.com), */ -#ifndef __CPCMD__ -#define __CPCMD__ +#ifndef _ASM_S390_CPCMD_H +#define _ASM_S390_CPCMD_H /* * the lowlevel function for cpcmd @@ -16,9 +16,6 @@ */ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); -#ifndef __s390x__ -#define cpcmd __cpcmd -#else /* * cpcmd is the in-kernel interface for issuing CP commands * @@ -33,6 +30,5 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep */ extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); -#endif /*__s390x__*/ -#endif +#endif /* _ASM_S390_CPCMD_H */ From c6b5b847a7cf11f131c43fe0041443ec11697fc7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:33 +0100 Subject: [PATCH 19/34] [S390] cpu shutdown rework Let one master cpu kill all other cpus instead of sending an external interrupt to all other cpus so they can kill themselves. Simplifies reipl/shutdown functions a lot. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 24 +------ arch/s390/kernel/machine_kexec.c | 65 +++++-------------- arch/s390/kernel/smp.c | 104 +++++++++---------------------- drivers/s390/char/sclp_quiesce.c | 37 +---------- include/asm-s390/smp.h | 8 +++ 5 files changed, 58 insertions(+), 180 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 101b003cfabf..a36bea1188d9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -576,23 +576,6 @@ static struct subsys_attribute dump_type_attr = static decl_subsys(dump, NULL, NULL); -#ifdef CONFIG_SMP -static void dump_smp_stop_all(void) -{ - int cpu; - preempt_disable(); - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while (signal_processor(cpu, sigp_stop) == sigp_busy) - udelay(10); - } - preempt_enable(); -} -#else -#define dump_smp_stop_all() do { } while (0) -#endif - /* * Shutdown actions section */ @@ -724,13 +707,13 @@ static void do_dump(void) switch (dump_method) { case IPL_METHOD_CCW_CIO: - dump_smp_stop_all(); + smp_send_stop(); devid.devno = dump_block_ccw->ipl_info.ccw.devno; devid.ssid = 0; reipl_ccw_dev(&devid); break; case IPL_METHOD_CCW_VM: - dump_smp_stop_all(); + smp_send_stop(); sprintf(buf, "STORE STATUS"); __cpcmd(buf, NULL, 0, NULL); sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); @@ -1059,9 +1042,6 @@ void s390_reset_system(void) { struct _lowcore *lc; - /* Disable all interrupts/machine checks */ - __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); - /* Stack for interrupt/machine check handler */ lc = (struct _lowcore *)(unsigned long) store_prefix(); lc->panic_stack = S390_lowcore.panic_stack; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 202bf1fdfe39..def5caf8f72f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,15 +1,10 @@ /* * arch/s390/kernel/machine_kexec.c * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005,2006 * - * Author(s): Rolf Adelsberger - * - */ - -/* - * s390_machine_kexec.c - handle the transition of Linux booting another kernel - * on the S390 architecture. + * Author(s): Rolf Adelsberger, + * Heiko Carstens */ #include @@ -24,81 +19,53 @@ #include #include -static void kexec_halt_all_cpus(void *); - -typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long); +typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; -int -machine_kexec_prepare(struct kimage *image) +int machine_kexec_prepare(struct kimage *image) { - unsigned long reboot_code_buffer; + void *reboot_code_buffer; /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; /* Get the destination where the assembler code should be copied to.*/ - reboot_code_buffer = page_to_pfn(image->control_code_page)<control_code_page); /* Then copy it */ - memcpy((void *) reboot_code_buffer, relocate_kernel, - relocate_kernel_len); + memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); return 0; } -void -machine_kexec_cleanup(struct kimage *image) +void machine_kexec_cleanup(struct kimage *image) { } -void -machine_shutdown(void) +void machine_shutdown(void) { printk(KERN_INFO "kexec: machine_shutdown called\n"); } -NORET_TYPE void -machine_kexec(struct kimage *image) -{ - on_each_cpu(kexec_halt_all_cpus, image, 0, 0); - for (;;); -} - extern void pfault_fini(void); -static void -kexec_halt_all_cpus(void *kernel_image) +void machine_kexec(struct kimage *image) { - static atomic_t cpuid = ATOMIC_INIT(-1); - int cpu; - struct kimage *image; relocate_kernel_t data_mover; + preempt_disable(); #ifdef CONFIG_PFAULT if (MACHINE_IS_VM) pfault_fini(); #endif - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while (!smp_cpu_not_running(cpu)) - cpu_relax(); - } - + smp_send_stop(); s390_reset_system(); - image = (struct kimage *) kernel_image; - data_mover = (relocate_kernel_t) - (page_to_pfn(image->control_code_page) << PAGE_SHIFT); + data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ - (*data_mover) (&image->head, image->start); + (*data_mover)(&image->head, image->start); + for (;;); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 62822245f9be..b549a43ed08f 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -230,18 +230,37 @@ static inline void do_store_status(void) } } +static inline void do_wait_for_stop(void) +{ + int cpu; + + /* Wait for all other cpus to enter stopped state */ + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + while(!smp_cpu_not_running(cpu)) + cpu_relax(); + } +} + /* * this function sends a 'stop' sigp to all other CPUs in the system. * it goes straight through. */ void smp_send_stop(void) { + /* Disable all interrupts/machine checks */ + __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); + /* write magic number to zero page (absolute 0) */ lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; /* stop other processors. */ do_send_stop(); + /* wait until other processors are stopped */ + do_wait_for_stop(); + /* store status of other processors. */ do_store_status(); } @@ -250,88 +269,28 @@ void smp_send_stop(void) * Reboot, halt and power_off routines for SMP. */ -static void do_machine_restart(void * __unused) -{ - int cpu; - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while(!smp_cpu_not_running(cpu)) - cpu_relax(); - } - - /* Store status of other cpus. */ - do_store_status(); - - /* - * Finally call reipl. Because we waited for all other - * cpus to enter this function we know that they do - * not hold any s390irq-locks (the cpus have been - * interrupted by an external interrupt and s390irq - * locks are always held disabled). - */ - do_reipl(); -} - void machine_restart_smp(char * __unused) { - on_each_cpu(do_machine_restart, NULL, 0, 0); -} - -static void do_wait_for_stop(void) -{ - unsigned long cr[16]; - - __ctl_store(cr, 0, 15); - cr[0] &= ~0xffff; - cr[6] = 0; - __ctl_load(cr, 0, 15); - for (;;) - enabled_wait(); -} - -static void do_machine_halt(void * __unused) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); - } - do_wait_for_stop(); + smp_send_stop(); + do_reipl(); } void machine_halt_smp(void) { - on_each_cpu(do_machine_halt, NULL, 0, 0); -} - -static void do_machine_power_off(void * __unused) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); - } - do_wait_for_stop(); + smp_send_stop(); + if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) + __cpcmd(vmhalt_cmd, NULL, 0, NULL); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); + for (;;); } void machine_power_off_smp(void) { - on_each_cpu(do_machine_power_off, NULL, 0, 0); + smp_send_stop(); + if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) + __cpcmd(vmpoff_cmd, NULL, 0, NULL); + signal_processor(smp_processor_id(), sigp_stop_and_store_status); + for (;;); } /* @@ -860,4 +819,3 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_get_cpu); EXPORT_SYMBOL(smp_put_cpu); - diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c index 32004aae95c1..ffa9282ce97a 100644 --- a/drivers/s390/char/sclp_quiesce.c +++ b/drivers/s390/char/sclp_quiesce.c @@ -19,52 +19,17 @@ #include "sclp.h" - -#ifdef CONFIG_SMP -/* Signal completion of shutdown process. All CPUs except the first to enter - * this function: go to stopped state. First CPU: wait until all other - * CPUs are in stopped or check stop state. Afterwards, load special PSW - * to indicate completion. */ -static void -do_load_quiesce_psw(void * __unused) -{ - static atomic_t cpuid = ATOMIC_INIT(-1); - psw_t quiesce_psw; - int cpu; - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while(!smp_cpu_not_running(cpu)) - cpu_relax(); - } - /* Quiesce the last cpu with the special psw */ - quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; - quiesce_psw.addr = 0xfff; - __load_psw(quiesce_psw); -} - -/* Shutdown handler. Perform shutdown function on all CPUs. */ -static void -do_machine_quiesce(void) -{ - on_each_cpu(do_load_quiesce_psw, NULL, 0, 0); -} -#else /* Shutdown handler. Signal completion of shutdown by loading special PSW. */ static void do_machine_quiesce(void) { psw_t quiesce_psw; + smp_send_stop(); quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; quiesce_psw.addr = 0xfff; __load_psw(quiesce_psw); } -#endif /* Handler for quiesce event. Start shutdown procedure. */ static void diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index c3cf030ada4d..7097c96ed026 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -18,6 +18,7 @@ #include #include +#include /* s390 specific smp.c headers @@ -101,6 +102,13 @@ smp_call_function_on(void (*func) (void *info), void *info, func(info); return 0; } + +static inline void smp_send_stop(void) +{ + /* Disable all interrupts/machine checks */ + __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); +} + #define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) From baf2aeb3d9e286add823bcaea5442ad4ee34f6e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:36 +0100 Subject: [PATCH 20/34] [S390] Misaligned wait PSW at memory detection. If the memory detection code would ever reach the point where it would load the wait psw, it would generate a specification exception and the system would crash at ipl time. This is because of a misaligned wait psw. It needs to be on a double word boundary instead of a word boundary. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head31.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 0a2c929486ab..adb082aeada6 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -131,10 +131,11 @@ startup_continue: .long init_thread_union .Lpmask: .byte 0 -.align 8 + .align 8 .Lpcext:.long 0x00080000,0x80000000 .Lcr: .long 0x00 # place holder for cr0 + .align 8 .Lwaitsclp: .long 0x010a0000,0x80000000 + .Lsclph .Lrcp: From 36a2bd425d9b3ba2a40b0653e08d17702c78558e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:38 +0100 Subject: [PATCH 21/34] [S390] Cleanup memory_chunk array usage. Need this at yet another file and don't want to add yet another extern... Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head31.S | 2 +- arch/s390/kernel/setup.c | 7 ++----- arch/s390/mm/extmem.c | 11 ++++------- include/asm-s390/setup.h | 15 ++++++++++++--- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index adb082aeada6..9817c327aab1 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -157,7 +157,7 @@ startup_continue: slr %r4,%r4 # set start of chunk to zero slr %r5,%r5 # set end of chunk to zero slr %r6,%r6 # set access code to zero - la %r10, MEMORY_CHUNKS # number of chunks + la %r10,MEMORY_CHUNKS # number of chunks .Lloop: tprot 0(%r5),0 # test protection of first byte ipm %r7 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9bbef0c65584..b1a8ad967f9c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -64,11 +64,8 @@ unsigned int console_devno = -1; unsigned int console_irq = -1; unsigned long memory_size = 0; unsigned long machine_flags = 0; -struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS] = { { 0 } }; -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 + +struct mem_chunk memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ unsigned long __initdata zholes_size[MAX_NR_ZONES]; static unsigned long __initdata memory_end; diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 077af5404948..375c2c4f6b77 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -14,12 +14,13 @@ #include #include #include +#include #include #include #include #include #include -#include +#include #define DCSS_DEBUG /* Debug messages on/off */ @@ -82,10 +83,6 @@ static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -extern struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS]; - /* * Create the 8 bytes, ebcdic VM segment name from * an ascii name. @@ -249,8 +246,8 @@ segment_overlaps_storage(struct dcss_segment *seg) { int i; - for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != 0) + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) continue; diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 5d72eda8a11b..7664bacdd832 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -2,7 +2,7 @@ * include/asm-s390/setup.h * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999,2006 */ #ifndef _ASM_S390_SETUP_H @@ -30,6 +30,17 @@ #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +struct mem_chunk { + unsigned long addr; + unsigned long size; + unsigned long type; +}; + +extern struct mem_chunk memory_chunk[]; + /* * Machine features detected in head.S */ @@ -53,7 +64,6 @@ extern unsigned long machine_flags; #define MACHINE_HAS_MVCOS (machine_flags & 512) #endif /* __s390x__ */ - #define MACHINE_HAS_SCLP (!MACHINE_IS_P390) /* @@ -71,7 +81,6 @@ extern unsigned int console_irq; #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) - struct ipl_list_hdr { u32 len; u8 reserved1[3]; From 29b08d2bae854f66d3cfd5f57aaf2e7c2c7fce32 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:40 +0100 Subject: [PATCH 22/34] [S390] pfault code cleanup. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/machine_kexec.c | 8 +------- arch/s390/kernel/smp.c | 13 +++---------- arch/s390/kernel/traps.c | 25 +------------------------ arch/s390/mm/fault.c | 28 +++++++++++++++++++++++++--- include/asm-s390/system.h | 10 ++++++++++ 5 files changed, 40 insertions(+), 44 deletions(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index def5caf8f72f..f6d9bcc0f75b 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -49,18 +49,12 @@ void machine_shutdown(void) printk(KERN_INFO "kexec: machine_shutdown called\n"); } -extern void pfault_fini(void); - void machine_kexec(struct kimage *image) { relocate_kernel_t data_mover; - preempt_disable(); -#ifdef CONFIG_PFAULT - if (MACHINE_IS_VM) - pfault_fini(); -#endif smp_send_stop(); + pfault_fini(); s390_reset_system(); data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b549a43ed08f..19090f7d4f51 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -460,8 +460,6 @@ __init smp_count_cpus(void) */ extern void init_cpu_timer(void); extern void init_cpu_vtimer(void); -extern int pfault_init(void); -extern void pfault_fini(void); int __devinit start_secondary(void *cpuvoid) { @@ -473,11 +471,9 @@ int __devinit start_secondary(void *cpuvoid) #ifdef CONFIG_VIRT_TIMER init_cpu_vtimer(); #endif -#ifdef CONFIG_PFAULT /* Enable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_init(); -#endif + pfault_init(); + /* Mark this cpu as online */ cpu_set(smp_processor_id(), cpu_online_map); /* Switch on interrupts */ @@ -667,11 +663,8 @@ __cpu_disable(void) } cpu_clear(cpu, cpu_online_map); -#ifdef CONFIG_PFAULT /* Disable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_fini(); -#endif + pfault_fini(); memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 92ecffbc8d82..475dbb884430 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -58,12 +58,6 @@ int sysctl_userprocess_debug = 0; extern pgm_check_handler_t do_protection_exception; extern pgm_check_handler_t do_dat_exception; -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -extern void pfault_interrupt(__u16 error_code); -static ext_int_info_t ext_int_pfault; -#endif extern pgm_check_handler_t do_monitor_call; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) @@ -739,22 +733,5 @@ void __init trap_init(void) pgm_check_table[0x1C] = &space_switch_exception; pgm_check_table[0x1D] = &hfp_sqrt_exception; pgm_check_table[0x40] = &do_monitor_call; - - if (MACHINE_IS_VM) { -#ifdef CONFIG_PFAULT - /* - * Try to get pfault pseudo page faults going. - */ - if (register_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault) != 0) - panic("Couldn't request external interrupt 0x2603"); - - if (pfault_init() == 0) - return; - - /* Tough luck, no pfault. */ - unregister_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault); -#endif - } + pfault_irq_init(); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1c323bbfda91..cd85e34d8703 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 @@ -394,6 +395,7 @@ void do_dat_exception(struct pt_regs *regs, unsigned long error_code) /* * 'pfault' pseudo page faults routines. */ +static ext_int_info_t ext_int_pfault; static int pfault_disable = 0; static int __init nopfault(char *str) @@ -422,7 +424,7 @@ int pfault_init(void) __PF_RES_FIELD }; int rc; - if (pfault_disable) + if (!MACHINE_IS_VM || pfault_disable) return -1; asm volatile( " diag %1,%0,0x258\n" @@ -440,7 +442,7 @@ void pfault_fini(void) pfault_refbk_t refbk = { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; - if (pfault_disable) + if (!MACHINE_IS_VM || pfault_disable) return; __ctl_clear_bit(0,9); asm volatile( @@ -500,5 +502,25 @@ pfault_interrupt(__u16 error_code) set_tsk_need_resched(tsk); } } -#endif +void __init pfault_irq_init(void) +{ + if (!MACHINE_IS_VM) + return; + + /* + * Try to get pfault pseudo page faults going. + */ + if (register_early_external_interrupt(0x2603, pfault_interrupt, + &ext_int_pfault) != 0) + panic("Couldn't request external interrupt 0x2603"); + + if (pfault_init() == 0) + return; + + /* Tough luck, no pfault. */ + pfault_disable = 1; + unregister_early_external_interrupt(0x2603, pfault_interrupt, + &ext_int_pfault); +} +#endif diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index ccbafe4bf2cb..bd0b05ae87d2 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -115,6 +115,16 @@ extern void account_system_vtime(struct task_struct *); #define account_vtime(x) do { /* empty */ } while (0) #endif +#ifdef CONFIG_PFAULT +extern void pfault_irq_init(void); +extern int pfault_init(void); +extern void pfault_fini(void); +#else /* CONFIG_PFAULT */ +#define pfault_irq_init() do { } while (0) +#define pfault_init() ({-1;}) +#define pfault_fini() do { } while (0) +#endif /* CONFIG_PFAULT */ + #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ account_vtime(prev); \ From d57de5a36791cb1b7285649c62f183b0d3505f7d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:42 +0100 Subject: [PATCH 23/34] [S390] Use diag260 for memory size detection. Avoid the tprot loop if diag260 works and reports that there are no holes in memory. The tprot instruction can lead to a significant delay in the ipl process if the virtual guest has a lot of memory and the host is under memory pressure. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head64.S | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 42f54d482441..cc6dbc57eb90 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -70,7 +70,22 @@ startup_continue: sgr %r5,%r5 # set src,length and pad to zero mvcle %r2,%r4,0 # clear mem jo .-4 # branch back, if not finish + # set program check new psw mask + mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) + larl %r1,.Lslowmemdetect # set program check address + stg %r1,__LC_PGM_NEW_PSW+8 + lghi %r1,0xc + diag %r0,%r1,0x260 # get memory size of virtual machine + cgr %r0,%r1 # different? -> old detection routine + jne .Lslowmemdetect + aghi %r1,1 # size is one more than end + larl %r2,memory_chunk + stg %r1,8(%r2) # store size of chunk + larl %r2,memory_size + stg %r1,0(%r2) # set memory size + j .Ldonemem +.Lslowmemdetect: l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word .Lservicecall: stosm .Lpmask-.LPG1(%r13),0x01 # authorize ext interrupts @@ -139,8 +154,6 @@ startup_continue: .int 0x100000 .Lfchunk: - # set program check new psw mask - mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) # # find memory chunks. From 59f35d53fde3987d071ea1c9bf1c9ba29fcb69fe Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 4 Dec 2006 15:40:45 +0100 Subject: [PATCH 24/34] [S390] Add dynamic size check for usercopy functions. Use a wrapper for copy_to/from_user to chose the best usercopy method. The mvcos instruction is better for sizes greater than 256 bytes, if mvcos is not available a page table walk is better for sizes greater than 1024 bytes. Also removed the redundant copy_to/from_user_std_small functions. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/lib/Makefile | 2 +- arch/s390/lib/uaccess_mvcos.c | 27 ++++-- arch/s390/lib/uaccess_pt.c | 153 ++++++++++++++++++++++++++++++++++ arch/s390/lib/uaccess_std.c | 67 ++++----------- 4 files changed, 190 insertions(+), 59 deletions(-) create mode 100644 arch/s390/lib/uaccess_pt.c diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index b0cfa6c4883d..b5f94cf3bde8 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,7 +4,7 @@ EXTRA_AFLAGS := -traditional -lib-y += delay.o string.o uaccess_std.o +lib-y += delay.o string.o uaccess_std.o uaccess_pt.o lib-$(CONFIG_32BIT) += div64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 121b2935a422..f9a23d57eb79 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -27,6 +27,9 @@ #define SLR "slgr" #endif +extern size_t copy_from_user_std(size_t, const void __user *, void *); +extern size_t copy_to_user_std(size_t, void __user *, const void *); + size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) { register unsigned long reg0 asm("0") = 0x81UL; @@ -66,6 +69,13 @@ size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) return size; } +size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x) +{ + if (size <= 256) + return copy_from_user_std(size, ptr, x); + return copy_from_user_mvcos(size, ptr, x); +} + size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) { register unsigned long reg0 asm("0") = 0x810000UL; @@ -95,6 +105,13 @@ size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) return size; } +size_t copy_to_user_mvcos_check(size_t size, void __user *ptr, const void *x) +{ + if (size <= 256) + return copy_to_user_std(size, ptr, x); + return copy_to_user_mvcos(size, ptr, x); +} + size_t copy_in_user_mvcos(size_t size, void __user *to, const void __user *from) { register unsigned long reg0 asm("0") = 0x810081UL; @@ -145,18 +162,16 @@ size_t clear_user_mvcos(size_t size, void __user *to) return size; } -extern size_t copy_from_user_std_small(size_t, const void __user *, void *); -extern size_t copy_to_user_std_small(size_t, void __user *, const void *); extern size_t strnlen_user_std(size_t, const char __user *); extern size_t strncpy_from_user_std(size_t, const char __user *, char *); extern int futex_atomic_op(int, int __user *, int, int *); extern int futex_atomic_cmpxchg(int __user *, int, int); struct uaccess_ops uaccess_mvcos = { - .copy_from_user = copy_from_user_mvcos, - .copy_from_user_small = copy_from_user_std_small, - .copy_to_user = copy_to_user_mvcos, - .copy_to_user_small = copy_to_user_std_small, + .copy_from_user = copy_from_user_mvcos_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_mvcos_check, + .copy_to_user_small = copy_to_user_std, .copy_in_user = copy_in_user_mvcos, .clear_user = clear_user_mvcos, .strnlen_user = strnlen_user_std, diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c new file mode 100644 index 000000000000..8741bdc09299 --- /dev/null +++ b/arch/s390/lib/uaccess_pt.c @@ -0,0 +1,153 @@ +/* + * arch/s390/lib/uaccess_pt.c + * + * User access functions based on page table walks. + * + * Copyright IBM Corp. 2006 + * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include +#include +#include +#include + +static inline int __handle_fault(struct mm_struct *mm, unsigned long address, + int write_access) +{ + struct vm_area_struct *vma; + int ret = -EFAULT; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (unlikely(!vma)) + goto out; + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + if (expand_stack(vma, address)) + goto out; + } + + if (!write_access) { + /* page not present, check vm flags */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto out; + } else { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + } + +survive: + switch (handle_mm_fault(mm, vma, address, write_access)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto out_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + ret = 0; +out: + up_read(&mm->mmap_sem); + return ret; + +out_of_memory: + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + goto survive; + } + printk("VM: killing process %s\n", current->comm); + return ret; + +out_sigbus: + up_read(&mm->mmap_sem); + current->thread.prot_addr = address; + current->thread.trap_no = 0x11; + force_sig(SIGBUS, current); + return ret; +} + +static inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, + size_t n, int write_user) +{ + struct mm_struct *mm = current->mm; + unsigned long offset, pfn, done, size; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + void *from, *to; + + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pgd = pgd_offset(mm, uaddr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto fault; + + pmd = pmd_offset(pgd, uaddr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto fault; + + pte = pte_offset_map(pmd, uaddr); + if (!pte || !pte_present(*pte) || + (write_user && !pte_write(*pte))) + goto fault; + + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) + goto out; + + offset = uaddr & (PAGE_SIZE - 1); + size = min(n - done, PAGE_SIZE - offset); + if (write_user) { + to = (void *)((pfn << PAGE_SHIFT) + offset); + from = kptr + done; + } else { + from = (void *)((pfn << PAGE_SHIFT) + offset); + to = kptr + done; + } + memcpy(to, from, size); + done += size; + uaddr += size; + } while (done < n); +out: + spin_unlock(&mm->page_table_lock); + return n - done; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(mm, uaddr, write_user)) + return n - done; + goto retry; +} + +size_t copy_from_user_pt(size_t n, const void __user *from, void *to) +{ + size_t rc; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(to, (void __kernel __force *) from, n); + return 0; + } + rc = __user_copy_pt((unsigned long) from, to, n, 0); + if (unlikely(rc)) + memset(to + n - rc, 0, rc); + return rc; +} + +size_t copy_to_user_pt(size_t n, void __user *to, const void *from) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy((void __kernel __force *) to, from, n); + return 0; + } + return __user_copy_pt((unsigned long) to, (void *) from, n, 1); +} diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index f44f0078b354..2d549ed2e113 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -28,6 +28,9 @@ #define SLR "slgr" #endif +extern size_t copy_from_user_pt(size_t n, const void __user *from, void *to); +extern size_t copy_to_user_pt(size_t n, void __user *to, const void *from); + size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) { unsigned long tmp1, tmp2; @@ -69,34 +72,11 @@ size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) return size; } -size_t copy_from_user_std_small(size_t size, const void __user *ptr, void *x) +size_t copy_from_user_std_check(size_t size, const void __user *ptr, void *x) { - unsigned long tmp1, tmp2; - - tmp1 = 0UL; - asm volatile( - "0: mvcp 0(%0,%2),0(%1),%3\n" - " "SLR" %0,%0\n" - " j 5f\n" - "1: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "2: mvcp 0(%4,%2),0(%1),%3\n" - " "SLR" %0,%4\n" - " "ALR" %2,%4\n" - "3:"LHI" %4,-1\n" - " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,4f\n" - " xc 0(1,%2),0(%2)\n" - "4: ex %4,0(%3)\n" - "5:\n" - EX_TABLE(0b,1b) EX_TABLE(2b,3b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; + if (size <= 1024) + return copy_from_user_std(size, ptr, x); + return copy_from_user_pt(size, ptr, x); } size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) @@ -130,28 +110,11 @@ size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) return size; } -size_t copy_to_user_std_small(size_t size, void __user *ptr, const void *x) +size_t copy_to_user_std_check(size_t size, void __user *ptr, const void *x) { - unsigned long tmp1, tmp2; - - tmp1 = 0UL; - asm volatile( - "0: mvcs 0(%0,%1),0(%2),%3\n" - " "SLR" %0,%0\n" - " j 3f\n" - "1: la %4,255(%1)\n" /* ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* (ptr + 255) & -4096UL */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 3f\n" - "2: mvcs 0(%4,%1),0(%2),%3\n" - " "SLR" %0,%4\n" - "3:\n" - EX_TABLE(0b,1b) EX_TABLE(2b,3b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; + if (size <= 1024) + return copy_to_user_std(size, ptr, x); + return copy_to_user_pt(size, ptr, x); } size_t copy_in_user_std(size_t size, void __user *to, const void __user *from) @@ -343,10 +306,10 @@ int futex_atomic_cmpxchg(int __user *uaddr, int oldval, int newval) } struct uaccess_ops uaccess_std = { - .copy_from_user = copy_from_user_std, - .copy_from_user_small = copy_from_user_std_small, - .copy_to_user = copy_to_user_std, - .copy_to_user_small = copy_to_user_std_small, + .copy_from_user = copy_from_user_std_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_std_check, + .copy_to_user_small = copy_to_user_std, .copy_in_user = copy_in_user_std, .clear_user = clear_user_std, .strnlen_user = strnlen_user_std, From 236257eee99a35d67d3feab0769bded83efdc3ec Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:47 +0100 Subject: [PATCH 25/34] [S390] lockdep: show held locks when showing a stackdump Follow i386/x86_64: lockdep can be used to print held locks when printing a backtrace. This can be useful when debugging things like 'scheduling while atomic' asserts. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/traps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 475dbb884430..3cbb0dcf1f1d 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -129,7 +129,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) } } -void show_trace(struct task_struct *task, unsigned long * stack) +void show_trace(struct task_struct *task, unsigned long *stack) { register unsigned long __r15 asm ("15"); unsigned long sp; @@ -151,6 +151,9 @@ void show_trace(struct task_struct *task, unsigned long * stack) __show_trace(sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); printk("\n"); + if (!task) + task = current; + debug_show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *sp) From 1681cedaee9f8dcb59d499a9277bc3a190a2378d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 4 Dec 2006 15:40:49 +0100 Subject: [PATCH 26/34] [S390] set KBUILD_IMAGE. Set KBUILD_IMAGE to a sane value. This enables "make rpm" Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5deb9f7544a1..6598e5268573 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,6 +35,9 @@ cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) +#KBUILD_IMAGE is necessary for make rpm +KBUILD_IMAGE :=arch/s390/boot/image + # # Prevent tail-call optimizations, to get clearer backtraces: # From 09252e77ed13b1b412329825bad5797fbdd5a5c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:51 +0100 Subject: [PATCH 27/34] [S390] Convert extmem spin_lock into a mutex. There's no need to have a spin_lock here, but need sleepable context for vmem_map. Therefore convert the spin_lock into a mutex. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 375c2c4f6b77..9e9bc48463a5 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -78,7 +78,7 @@ struct dcss_segment { int segcnt; }; -static DEFINE_SPINLOCK(dcss_lock); +static DEFINE_MUTEX(dcss_lock); static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; @@ -114,7 +114,7 @@ segment_by_name (char *name) struct list_head *l; struct dcss_segment *tmp, *retval = NULL; - assert_spin_locked(&dcss_lock); + BUG_ON(!mutex_is_locked(&dcss_lock)); dcss_mkname (name, dcss_name); list_for_each (l, &dcss_list) { tmp = list_entry (l, struct dcss_segment, list); @@ -269,7 +269,7 @@ segment_overlaps_others (struct dcss_segment *seg) struct list_head *l; struct dcss_segment *tmp; - assert_spin_locked(&dcss_lock); + BUG_ON(!mutex_is_locked(&dcss_lock)); list_for_each(l, &dcss_list) { tmp = list_entry(l, struct dcss_segment, list); if ((tmp->start_addr >> 20) > (seg->end >> 20)) @@ -426,7 +426,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, if (!MACHINE_IS_VM) return -ENOSYS; - spin_lock (&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) rc = __segment_load (name, do_nonshared, addr, end); @@ -441,7 +441,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, rc = -EPERM; } } - spin_unlock (&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -464,7 +464,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long dummy; int dcss_command, rc, diag_cc; - spin_lock (&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { rc = -EINVAL; @@ -505,7 +505,7 @@ segment_modify_shared (char *name, int do_nonshared) &dummy, &dummy); kfree(seg); out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -523,7 +523,7 @@ segment_unload(char *name) if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { PRINT_ERR ("could not find segment %s in segment_unload, " @@ -537,7 +537,7 @@ segment_unload(char *name) kfree(seg); } out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); } /* @@ -556,7 +556,7 @@ segment_save(char *name) if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { @@ -589,7 +589,7 @@ segment_save(char *name) goto out; } out: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); } EXPORT_SYMBOL(segment_load); From ce26a8532fd062ccd3f3c589a4be269a2dc20f00 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 4 Dec 2006 15:40:54 +0100 Subject: [PATCH 28/34] [S390] cio: Make ccw_dev_id_is_equal() more robust. Using memcmp to compare ccw_dev_id implies that the whole structure (incl. padding) has always been completely initialized to sane values. Comparing the structures field by field doesn't make such assumptions. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- include/asm-s390/cio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index cabd5bb74b5a..d92785030980 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -278,7 +278,10 @@ struct ccw_dev_id { static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, struct ccw_dev_id *dev_id2) { - return !memcmp(dev_id1, dev_id2, sizeof(struct ccw_dev_id)); + if ((dev_id1->ssid == dev_id2->ssid) && + (dev_id1->devno == dev_id2->devno)) + return 1; + return 0; } extern int diag210(struct diag210 *addr); From 8b62bc9642300471737bc3b77b2a4a2ead46dedb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:40:56 +0100 Subject: [PATCH 29/34] [S390] Memory detection fixes. VMALLOC_END on 31bit should be 0x8000000UL instead of 0x7fffffffL. The page mask which is used to make sure memory_end is on 4MB/2MB boundary is wrong and not needed. Therefore remove it. Make sure a vmalloc area does also exist and work on (future) machines with 4TB and more memory. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head31.S | 3 --- arch/s390/kernel/head64.S | 4 ---- arch/s390/kernel/setup.c | 49 +++++++++++++++++++++++++------------- include/asm-s390/pgtable.h | 19 +++++++++++---- 4 files changed, 46 insertions(+), 29 deletions(-) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 9817c327aab1..4388b3309e0c 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -177,8 +177,6 @@ startup_continue: st %r0,4(%r3) # store size of chunk st %r6,8(%r3) # store type of chunk la %r3,12(%r3) - l %r4,.Lmemsize-.LPG1(%r13) # address of variable memory_size - st %r5,0(%r4) # store last end to memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc @@ -293,7 +291,6 @@ startup_continue: .Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg .Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte .Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c -.Lmemsize:.long memory_size .Lmchunk:.long memory_chunk .Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index cc6dbc57eb90..c526279e1123 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -81,8 +81,6 @@ startup_continue: aghi %r1,1 # size is one more than end larl %r2,memory_chunk stg %r1,8(%r2) # store size of chunk - larl %r2,memory_size - stg %r1,0(%r2) # set memory size j .Ldonemem .Lslowmemdetect: @@ -188,8 +186,6 @@ startup_continue: stg %r0,8(%r3) # store size of chunk st %r6,20(%r3) # store type of chunk la %r3,24(%r3) - larl %r8,memory_size - stg %r5,0(%r8) # store memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b1a8ad967f9c..b928fecdc743 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -62,7 +62,6 @@ EXPORT_SYMBOL_GPL(uaccess); unsigned int console_mode = 0; unsigned int console_devno = -1; unsigned int console_irq = -1; -unsigned long memory_size = 0; unsigned long machine_flags = 0; struct mem_chunk memory_chunk[MEMORY_CHUNKS]; @@ -486,6 +485,37 @@ setup_resources(void) } } +static void __init setup_memory_end(void) +{ + unsigned long real_size, memory_size; + unsigned long max_mem, max_phys; + int i; + + memory_size = real_size = 0; + max_phys = VMALLOC_END - VMALLOC_MIN_SIZE; + memory_end &= PAGE_MASK; + + max_mem = memory_end ? min(max_phys, memory_end) : max_phys; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; + + real_size = max(real_size, chunk->addr + chunk->size); + if (chunk->addr >= max_mem) { + memset(chunk, 0, sizeof(*chunk)); + continue; + } + if (chunk->addr + chunk->size > max_mem) + chunk->size = max_mem - chunk->addr; + memory_size = max(memory_size, chunk->addr + chunk->size); + } + if (!memory_end) + memory_end = memory_size; + if (real_size > memory_end) + printk("More memory detected than supported. Unused: %luk\n", + (real_size - memory_end) >> 10); +} + static void __init setup_memory(void) { @@ -642,8 +672,6 @@ setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - memory_end = memory_size; - if (MACHINE_HAS_MVCOS) memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); else @@ -651,20 +679,7 @@ setup_arch(char **cmdline_p) parse_early_param(); -#ifndef CONFIG_64BIT - memory_end &= ~0x400000UL; - - /* - * We need some free virtual space to be able to do vmalloc. - * On a machine with 2GB memory we make sure that we have at - * least 128 MB free space for vmalloc. - */ - if (memory_end > 1920*1024*1024) - memory_end = 1920*1024*1024; -#else /* CONFIG_64BIT */ - memory_end &= ~0x200000UL; -#endif /* CONFIG_64BIT */ - + setup_memory_end(); setup_memory(); setup_resources(); setup_lowcore(); diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 36bb6dacf008..2d968a69ed1f 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -110,12 +110,21 @@ extern char empty_zero_page[PAGE_SIZE]; #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ & ~(VMALLOC_OFFSET-1)) -#ifndef __s390x__ -# define VMALLOC_END (0x7fffffffL) -#else /* __s390x__ */ -# define VMALLOC_END (0x40000000000L) -#endif /* __s390x__ */ +/* + * We need some free virtual space to be able to do vmalloc. + * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc + * area. On a machine with 2GB memory we make sure that we + * have at least 128MB free space for vmalloc. On a machine + * with 4TB we make sure we have at least 1GB. + */ +#ifndef __s390x__ +#define VMALLOC_MIN_SIZE 0x8000000UL +#define VMALLOC_END 0x80000000UL +#else /* __s390x__ */ +#define VMALLOC_MIN_SIZE 0x40000000UL +#define VMALLOC_END 0x40000000000UL +#endif /* __s390x__ */ /* * A 31 bit pagetable entry of S390 has following format: From 9163bb2e556f6c7879961df94540f0879db4717b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Dec 2006 15:40:59 +0100 Subject: [PATCH 30/34] [S390] non-unique constant/macro identifiers. Add some prefixes to constands defined in drivers/s390/net/qdio.h and drivers/s390/lcs.h to make it possible to include the three header files drivers/s390/net/qeth.h, drivers/s390/net/qdio.h and drivers/net/s390/lcs.h in one C file. This is required for the patch that generates the kerntypes.o file for use by lcrash. Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.c | 8 ++-- drivers/s390/cio/qdio.h | 4 +- drivers/s390/net/lcs.c | 78 ++++++++++++++++++------------------ drivers/s390/net/lcs.h | 25 +++++------- drivers/s390/net/qeth.h | 2 - drivers/s390/net/qeth_main.c | 6 +-- 6 files changed, 58 insertions(+), 65 deletions(-) diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 476aa1da5cbc..8d5fa1b4d11f 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -481,7 +481,7 @@ qdio_stop_polling(struct qdio_q *q) unsigned char state = 0; struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - if (!atomic_swap(&q->polling,0)) + if (!atomic_xchg(&q->polling,0)) return 1; QDIO_DBF_TEXT4(0,trace,"stoppoll"); @@ -1964,8 +1964,8 @@ qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN); QDIO_PRINT_WARN("sense data available on qdio channel.\n"); - HEXDUMP16(WARN,"irb: ",irb); - HEXDUMP16(WARN,"sense data: ",irb->ecw); + QDIO_HEXDUMP16(WARN,"irb: ",irb); + QDIO_HEXDUMP16(WARN,"sense data: ",irb->ecw); } } @@ -3425,7 +3425,7 @@ do_qdio_handle_inbound(struct qdio_q *q, unsigned int callflags, if ((used_elements+count==QDIO_MAX_BUFFERS_PER_Q)&& (callflags&QDIO_FLAG_UNDER_INTERRUPT)) - atomic_swap(&q->polling,0); + atomic_xchg(&q->polling,0); if (used_elements) return; diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 49bb9e371c32..42927c1b7451 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -236,7 +236,7 @@ enum qdio_irq_states { #define QDIO_PRINT_EMERG(x...) do { } while (0) #endif -#define HEXDUMP16(importance,header,ptr) \ +#define QDIO_HEXDUMP16(importance,header,ptr) \ QDIO_PRINT_##importance(header "%02x %02x %02x %02x " \ "%02x %02x %02x %02x %02x %02x %02x %02x " \ "%02x %02x %02x %02x\n",*(((char*)ptr)), \ @@ -429,8 +429,6 @@ struct qdio_perf_stats { }; #endif /* QDIO_PERFORMANCE_STATS */ -#define atomic_swap(a,b) xchg((int*)a.counter,b) - /* unlikely as the later the better */ #define SYNC_MEMORY if (unlikely(q->siga_sync)) qdio_siga_sync_q(q) #define SYNC_MEMORY_ALL if (unlikely(q->siga_sync)) \ diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 66a8aec6efa6..08d4e47070bd 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -54,6 +54,8 @@ #error Cannot compile lcs.c without some net devices switched on. #endif +#define PRINTK_HEADER " lcs: " + /** * initialization string for output */ @@ -120,7 +122,7 @@ lcs_alloc_channel(struct lcs_channel *channel) kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL); if (channel->iob[cnt].data == NULL) break; - channel->iob[cnt].state = BUF_STATE_EMPTY; + channel->iob[cnt].state = LCS_BUF_STATE_EMPTY; } if (cnt < LCS_NUM_BUFFS) { /* Not all io buffers could be allocated. */ @@ -236,7 +238,7 @@ lcs_setup_read_ccws(struct lcs_card *card) ((struct lcs_header *) card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET; card->read.iob[cnt].callback = lcs_get_frames_cb; - card->read.iob[cnt].state = BUF_STATE_READY; + card->read.iob[cnt].state = LCS_BUF_STATE_READY; card->read.iob[cnt].count = LCS_IOBUFFERSIZE; } card->read.ccws[0].flags &= ~CCW_FLAG_PCI; @@ -247,7 +249,7 @@ lcs_setup_read_ccws(struct lcs_card *card) card->read.ccws[LCS_NUM_BUFFS].cda = (__u32) __pa(card->read.ccws); /* Setg initial state of the read channel. */ - card->read.state = CH_STATE_INIT; + card->read.state = LCS_CH_STATE_INIT; card->read.io_idx = 0; card->read.buf_idx = 0; @@ -294,7 +296,7 @@ lcs_setup_write_ccws(struct lcs_card *card) card->write.ccws[LCS_NUM_BUFFS].cda = (__u32) __pa(card->write.ccws); /* Set initial state of the write channel. */ - card->read.state = CH_STATE_INIT; + card->read.state = LCS_CH_STATE_INIT; card->write.io_idx = 0; card->write.buf_idx = 0; @@ -496,7 +498,7 @@ lcs_start_channel(struct lcs_channel *channel) channel->ccws + channel->io_idx, 0, 0, DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND); if (rc == 0) - channel->state = CH_STATE_RUNNING; + channel->state = LCS_CH_STATE_RUNNING; spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); if (rc) { LCS_DBF_TEXT_(4,trace,"essh%s", channel->ccwdev->dev.bus_id); @@ -520,8 +522,8 @@ lcs_clear_channel(struct lcs_channel *channel) LCS_DBF_TEXT_(4,trace,"ecsc%s", channel->ccwdev->dev.bus_id); return rc; } - wait_event(channel->wait_q, (channel->state == CH_STATE_CLEARED)); - channel->state = CH_STATE_STOPPED; + wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED)); + channel->state = LCS_CH_STATE_STOPPED; return rc; } @@ -535,11 +537,11 @@ lcs_stop_channel(struct lcs_channel *channel) unsigned long flags; int rc; - if (channel->state == CH_STATE_STOPPED) + if (channel->state == LCS_CH_STATE_STOPPED) return 0; LCS_DBF_TEXT(4,trace,"haltsch"); LCS_DBF_TEXT_(4,trace,"%s", channel->ccwdev->dev.bus_id); - channel->state = CH_STATE_INIT; + channel->state = LCS_CH_STATE_INIT; spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); rc = ccw_device_halt(channel->ccwdev, (addr_t) channel); spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); @@ -548,7 +550,7 @@ lcs_stop_channel(struct lcs_channel *channel) return rc; } /* Asynchronous halt initialted. Wait for its completion. */ - wait_event(channel->wait_q, (channel->state == CH_STATE_HALTED)); + wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED)); lcs_clear_channel(channel); return 0; } @@ -596,8 +598,8 @@ __lcs_get_buffer(struct lcs_channel *channel) LCS_DBF_TEXT(5, trace, "_getbuff"); index = channel->io_idx; do { - if (channel->iob[index].state == BUF_STATE_EMPTY) { - channel->iob[index].state = BUF_STATE_LOCKED; + if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) { + channel->iob[index].state = LCS_BUF_STATE_LOCKED; return channel->iob + index; } index = (index + 1) & (LCS_NUM_BUFFS - 1); @@ -626,7 +628,7 @@ __lcs_resume_channel(struct lcs_channel *channel) { int rc; - if (channel->state != CH_STATE_SUSPENDED) + if (channel->state != LCS_CH_STATE_SUSPENDED) return 0; if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND) return 0; @@ -636,7 +638,7 @@ __lcs_resume_channel(struct lcs_channel *channel) LCS_DBF_TEXT_(4, trace, "ersc%s", channel->ccwdev->dev.bus_id); PRINT_ERR("Error in lcs_resume_channel: rc=%d\n",rc); } else - channel->state = CH_STATE_RUNNING; + channel->state = LCS_CH_STATE_RUNNING; return rc; } @@ -670,10 +672,10 @@ lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) int index, rc; LCS_DBF_TEXT(5, trace, "rdybuff"); - BUG_ON(buffer->state != BUF_STATE_LOCKED && - buffer->state != BUF_STATE_PROCESSED); + BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && + buffer->state != LCS_BUF_STATE_PROCESSED); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = BUF_STATE_READY; + buffer->state = LCS_BUF_STATE_READY; index = buffer - channel->iob; /* Set length. */ channel->ccws[index].count = buffer->count; @@ -695,8 +697,8 @@ __lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) int index, prev, next; LCS_DBF_TEXT(5, trace, "prcsbuff"); - BUG_ON(buffer->state != BUF_STATE_READY); - buffer->state = BUF_STATE_PROCESSED; + BUG_ON(buffer->state != LCS_BUF_STATE_READY); + buffer->state = LCS_BUF_STATE_PROCESSED; index = buffer - channel->iob; prev = (index - 1) & (LCS_NUM_BUFFS - 1); next = (index + 1) & (LCS_NUM_BUFFS - 1); @@ -704,7 +706,7 @@ __lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) channel->ccws[index].flags |= CCW_FLAG_SUSPEND; channel->ccws[index].flags &= ~CCW_FLAG_PCI; /* Check the suspend bit of the previous buffer. */ - if (channel->iob[prev].state == BUF_STATE_READY) { + if (channel->iob[prev].state == LCS_BUF_STATE_READY) { /* * Previous buffer is in state ready. It might have * happened in lcs_ready_buffer that the suspend bit @@ -727,10 +729,10 @@ lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) unsigned long flags; LCS_DBF_TEXT(5, trace, "relbuff"); - BUG_ON(buffer->state != BUF_STATE_LOCKED && - buffer->state != BUF_STATE_PROCESSED); + BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && + buffer->state != LCS_BUF_STATE_PROCESSED); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = BUF_STATE_EMPTY; + buffer->state = LCS_BUF_STATE_EMPTY; spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); } @@ -1264,7 +1266,7 @@ lcs_register_mc_addresses(void *data) netif_carrier_off(card->dev); netif_tx_disable(card->dev); wait_event(card->write.wait_q, - (card->write.state != CH_STATE_RUNNING)); + (card->write.state != LCS_CH_STATE_RUNNING)); lcs_fix_multicast_list(card); if (card->state == DEV_STATE_UP) { netif_carrier_on(card->dev); @@ -1404,7 +1406,7 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) } } /* How far in the ccw chain have we processed? */ - if ((channel->state != CH_STATE_INIT) && + if ((channel->state != LCS_CH_STATE_INIT) && (irb->scsw.fctl & SCSW_FCTL_START_FUNC)) { index = (struct ccw1 *) __va((addr_t) irb->scsw.cpa) - channel->ccws; @@ -1424,20 +1426,20 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) (irb->scsw.dstat & DEV_STAT_CHN_END) || (irb->scsw.dstat & DEV_STAT_UNIT_CHECK)) /* Mark channel as stopped. */ - channel->state = CH_STATE_STOPPED; + channel->state = LCS_CH_STATE_STOPPED; else if (irb->scsw.actl & SCSW_ACTL_SUSPENDED) /* CCW execution stopped on a suspend bit. */ - channel->state = CH_STATE_SUSPENDED; + channel->state = LCS_CH_STATE_SUSPENDED; if (irb->scsw.fctl & SCSW_FCTL_HALT_FUNC) { if (irb->scsw.cc != 0) { ccw_device_halt(channel->ccwdev, (addr_t) channel); return; } /* The channel has been stopped by halt_IO. */ - channel->state = CH_STATE_HALTED; + channel->state = LCS_CH_STATE_HALTED; } if (irb->scsw.fctl & SCSW_FCTL_CLEAR_FUNC) { - channel->state = CH_STATE_CLEARED; + channel->state = LCS_CH_STATE_CLEARED; } /* Do the rest in the tasklet. */ tasklet_schedule(&channel->irq_tasklet); @@ -1461,7 +1463,7 @@ lcs_tasklet(unsigned long data) /* Check for processed buffers. */ iob = channel->iob; buf_idx = channel->buf_idx; - while (iob[buf_idx].state == BUF_STATE_PROCESSED) { + while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) { /* Do the callback thing. */ if (iob[buf_idx].callback != NULL) iob[buf_idx].callback(channel, iob + buf_idx); @@ -1469,12 +1471,12 @@ lcs_tasklet(unsigned long data) } channel->buf_idx = buf_idx; - if (channel->state == CH_STATE_STOPPED) + if (channel->state == LCS_CH_STATE_STOPPED) // FIXME: what if rc != 0 ?? rc = lcs_start_channel(channel); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - if (channel->state == CH_STATE_SUSPENDED && - channel->iob[channel->io_idx].state == BUF_STATE_READY) { + if (channel->state == LCS_CH_STATE_SUSPENDED && + channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY) { // FIXME: what if rc != 0 ?? rc = __lcs_resume_channel(channel); } @@ -1689,8 +1691,8 @@ lcs_detect(struct lcs_card *card) card->state = DEV_STATE_UP; } else { card->state = DEV_STATE_DOWN; - card->write.state = CH_STATE_INIT; - card->read.state = CH_STATE_INIT; + card->write.state = LCS_CH_STATE_INIT; + card->read.state = LCS_CH_STATE_INIT; } return rc; } @@ -1705,8 +1707,8 @@ lcs_stopcard(struct lcs_card *card) LCS_DBF_TEXT(3, setup, "stopcard"); - if (card->read.state != CH_STATE_STOPPED && - card->write.state != CH_STATE_STOPPED && + if (card->read.state != LCS_CH_STATE_STOPPED && + card->write.state != LCS_CH_STATE_STOPPED && card->state == DEV_STATE_UP) { lcs_clear_multicast_list(card); rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP); @@ -1871,7 +1873,7 @@ lcs_stop_device(struct net_device *dev) netif_tx_disable(dev); dev->flags &= ~IFF_UP; wait_event(card->write.wait_q, - (card->write.state != CH_STATE_RUNNING)); + (card->write.state != LCS_CH_STATE_RUNNING)); rc = lcs_stopcard(card); if (rc) PRINT_ERR("Try it again!\n "); diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h index b5247dc08b57..0e1e4a0a88f0 100644 --- a/drivers/s390/net/lcs.h +++ b/drivers/s390/net/lcs.h @@ -22,11 +22,6 @@ do { \ debug_text_event(lcs_dbf_##name,level, debug_buffer);\ } while (0) -/** - * some more definitions for debug or output stuff - */ -#define PRINTK_HEADER " lcs: " - /** * sysfs related stuff */ @@ -127,22 +122,22 @@ do { \ * LCS Buffer states */ enum lcs_buffer_states { - BUF_STATE_EMPTY, /* buffer is empty */ - BUF_STATE_LOCKED, /* buffer is locked, don't touch */ - BUF_STATE_READY, /* buffer is ready for read/write */ - BUF_STATE_PROCESSED, + LCS_BUF_STATE_EMPTY, /* buffer is empty */ + LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */ + LCS_BUF_STATE_READY, /* buffer is ready for read/write */ + LCS_BUF_STATE_PROCESSED, }; /** * LCS Channel State Machine declarations */ enum lcs_channel_states { - CH_STATE_INIT, - CH_STATE_HALTED, - CH_STATE_STOPPED, - CH_STATE_RUNNING, - CH_STATE_SUSPENDED, - CH_STATE_CLEARED, + LCS_CH_STATE_INIT, + LCS_CH_STATE_HALTED, + LCS_CH_STATE_STOPPED, + LCS_CH_STATE_RUNNING, + LCS_CH_STATE_SUSPENDED, + LCS_CH_STATE_CLEARED, }; /** diff --git a/drivers/s390/net/qeth.h b/drivers/s390/net/qeth.h index 821383d8cbe7..53c358c7d368 100644 --- a/drivers/s390/net/qeth.h +++ b/drivers/s390/net/qeth.h @@ -151,8 +151,6 @@ qeth_hex_dump(unsigned char *buf, size_t len) #define SENSE_RESETTING_EVENT_BYTE 1 #define SENSE_RESETTING_EVENT_FLAG 0x80 -#define atomic_swap(a,b) xchg((int *)a.counter, b) - /* * Common IO related definitions */ diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 8364d5475ac7..7fdc5272c446 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -2982,7 +2982,7 @@ qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) */ if ((atomic_read(&queue->used_buffers) <= QETH_LOW_WATERMARK_PACK) || !atomic_read(&queue->set_pci_flags_count)){ - if (atomic_swap(&queue->state, QETH_OUT_Q_LOCKED_FLUSH) == + if (atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH) == QETH_OUT_Q_UNLOCKED) { /* * If we get in here, there was no action in @@ -3245,7 +3245,7 @@ qeth_free_qdio_buffers(struct qeth_card *card) int i, j; QETH_DBF_TEXT(trace, 2, "freeqdbf"); - if (atomic_swap(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == QETH_QDIO_UNINITIALIZED) return; kfree(card->qdio.in_q); @@ -4366,7 +4366,7 @@ out: if (flush_count) qeth_flush_buffers(queue, 0, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) - atomic_swap(&queue->state, QETH_OUT_Q_LOCKED_FLUSH); + atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH); /* * queue->state will go from LOCKED -> UNLOCKED or from * LOCKED_FLUSH -> LOCKED if output_handler wanted to 'notify' us From 24cb5b4846ebae5543869b5c596c2650f380df53 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 4 Dec 2006 15:41:01 +0100 Subject: [PATCH 31/34] [S390] cio: Use path verification for last path gone after vary off. If the last path to a device is gone after a chpid has been varied off, putting it on the slow queue doesn't prevent a device driver from still attempting to use it (it may stay on the slow queue for a long time). Instead, trigger a verify event which will prevent I/O attempts from the device driver immediately. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 22 ++++++++++++---------- drivers/s390/cio/css.h | 1 + drivers/s390/cio/device_fsm.c | 11 +++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9d92540bd99e..11900de94cb3 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -744,20 +744,22 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) device_trigger_reprobe(sch); else if (sch->driver && sch->driver->verify) sch->driver->verify(&sch->dev); - } else { - sch->opm &= ~(0x80 >> chp); - sch->lpm &= ~(0x80 >> chp); - if (check_for_io_on_path(sch, chp)) - /* Path verification is done after killing. */ - device_kill_io(sch); - else if (!sch->lpm) { + break; + } + sch->opm &= ~(0x80 >> chp); + sch->lpm &= ~(0x80 >> chp); + if (check_for_io_on_path(sch, chp)) + /* Path verification is done after killing. */ + device_kill_io(sch); + else if (!sch->lpm) { + if (device_trigger_verify(sch) != 0) { if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; } - } else if (sch->driver && sch->driver->verify) - sch->driver->verify(&sch->dev); - } + } + } else if (sch->driver && sch->driver->verify) + sch->driver->verify(&sch->dev); break; } spin_unlock_irqrestore(&sch->lock, flags); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 4c2ff8336288..da73453fa54b 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -171,6 +171,7 @@ void device_trigger_reprobe(struct subchannel *); /* Helper functions for vary on/off. */ int device_is_online(struct subchannel *); void device_kill_io(struct subchannel *); +int device_trigger_verify(struct subchannel *sch); /* Machine check helper function. */ void device_kill_pending_timer(struct subchannel *); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index de3d0857db9f..7665000e8dfe 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -59,6 +59,17 @@ device_set_disconnected(struct subchannel *sch) cdev->private->state = DEV_STATE_DISCONNECTED; } +int device_trigger_verify(struct subchannel *sch) +{ + struct ccw_device *cdev; + + cdev = sch->dev.driver_data; + if (!cdev || !cdev->online) + return -EINVAL; + dev_fsm_event(cdev, DEV_EVENT_VERIFY); + return 0; +} + /* * Timeout function. It just triggers a DEV_EVENT_TIMEOUT. */ From d23861ff1a4da1f4a5255eb4582f620191c6e1c0 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 4 Dec 2006 15:41:04 +0100 Subject: [PATCH 32/34] [S390] cio: Retry internal operations after vary off. If I/O was running on a just varied off chpid, it will be terminated. If this was a common I/O layer internal I/O, it needs to be retried. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 37 ++++++++++++++++++++++++++------ drivers/s390/cio/css.h | 2 ++ drivers/s390/cio/device.c | 3 +++ drivers/s390/cio/device_fsm.c | 16 ++++++++++++++ drivers/s390/cio/device_id.c | 10 ++++++++- drivers/s390/cio/device_pgid.c | 30 +++++++++++++++++++++++--- drivers/s390/cio/device_status.c | 3 +++ 7 files changed, 90 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 11900de94cb3..dbfb77b03928 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -251,6 +251,8 @@ s390_subchannel_remove_chpid(struct device *dev, void *data) cc = cio_clear(sch); if (cc == -ENODEV) goto out_unreg; + /* Request retry of internal operation. */ + device_set_intretry(sch); /* Call handler. */ if (sch->driver && sch->driver->termination) sch->driver->termination(&sch->dev); @@ -711,9 +713,6 @@ static inline int check_for_io_on_path(struct subchannel *sch, int index) { int cc; - if (!device_is_online(sch)) - /* cio could be doing I/O. */ - return 0; cc = stsch(sch->schid, &sch->schib); if (cc) return 0; @@ -722,6 +721,26 @@ static inline int check_for_io_on_path(struct subchannel *sch, int index) return 0; } +static void terminate_internal_io(struct subchannel *sch) +{ + if (cio_clear(sch)) { + /* Recheck device in case clear failed. */ + sch->lpm = 0; + if (device_trigger_verify(sch) != 0) { + if(css_enqueue_subchannel_slow(sch->schid)) { + css_clear_subchannel_slow_list(); + need_rescan = 1; + } + } + return; + } + /* Request retry of internal operation. */ + device_set_intretry(sch); + /* Call handler. */ + if (sch->driver && sch->driver->termination) + sch->driver->termination(&sch->dev); +} + static inline void __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) { @@ -748,10 +767,14 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) } sch->opm &= ~(0x80 >> chp); sch->lpm &= ~(0x80 >> chp); - if (check_for_io_on_path(sch, chp)) - /* Path verification is done after killing. */ - device_kill_io(sch); - else if (!sch->lpm) { + if (check_for_io_on_path(sch, chp)) { + if (device_is_online(sch)) + /* Path verification is done after killing. */ + device_kill_io(sch); + else + /* Kill and retry internal I/O. */ + terminate_internal_io(sch); + } else if (!sch->lpm) { if (device_trigger_verify(sch) != 0) { if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index da73453fa54b..9ff064e71767 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -94,6 +94,7 @@ struct ccw_device_private { unsigned int donotify:1; /* call notify function */ unsigned int recog_done:1; /* dev. recog. complete */ unsigned int fake_irb:1; /* deliver faked irb */ + unsigned int intretry:1; /* retry internal operation */ } __attribute__((packed)) flags; unsigned long intparm; /* user interruption parameter */ struct qdio_irq *qdio_data; @@ -171,6 +172,7 @@ void device_trigger_reprobe(struct subchannel *); /* Helper functions for vary on/off. */ int device_is_online(struct subchannel *); void device_kill_io(struct subchannel *); +void device_set_intretry(struct subchannel *sch); int device_trigger_verify(struct subchannel *sch); /* Machine check helper function. */ diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 39c98f940507..359b46c25314 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -948,6 +948,9 @@ io_subchannel_ioterm(struct device *dev) cdev = dev->driver_data; if (!cdev) return; + /* Internal I/O will be retried by the interrupt handler. */ + if (cdev->private->flags.intretry) + return; cdev->private->state = DEV_STATE_CLEAR_VERIFY; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 7665000e8dfe..09c7672eb3f3 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -59,6 +59,16 @@ device_set_disconnected(struct subchannel *sch) cdev->private->state = DEV_STATE_DISCONNECTED; } +void device_set_intretry(struct subchannel *sch) +{ + struct ccw_device *cdev; + + cdev = sch->dev.driver_data; + if (!cdev) + return; + cdev->private->flags.intretry = 1; +} + int device_trigger_verify(struct subchannel *sch) { struct ccw_device *cdev; @@ -904,6 +914,12 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event) * had killed the original request. */ if (irb->scsw.fctl & (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) { + /* Retry Basic Sense if requested. */ + if (cdev->private->flags.intretry) { + cdev->private->flags.intretry = 0; + ccw_device_do_sense(cdev, irb); + return; + } cdev->private->flags.dosense = 0; memset(&cdev->private->irb, 0, sizeof(struct irb)); ccw_device_accumulate_irb(cdev, irb); diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index a74785b9e4eb..f17275917fe5 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -191,6 +191,8 @@ __ccw_device_sense_id_start(struct ccw_device *cdev) if ((sch->opm & cdev->private->imask) != 0 && cdev->private->iretry > 0) { cdev->private->iretry--; + /* Reset internal retry indication. */ + cdev->private->flags.intretry = 0; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); /* ret is 0, -EBUSY, -EACCES or -ENODEV */ @@ -237,8 +239,14 @@ ccw_device_check_sense_id(struct ccw_device *cdev) return 0; /* Success */ } /* Check the error cases. */ - if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) { + /* Retry Sense ID if requested. */ + if (cdev->private->flags.intretry) { + cdev->private->flags.intretry = 0; + return -EAGAIN; + } return -ETIME; + } if (irb->esw.esw0.erw.cons && (irb->ecw[0] & SNS0_CMD_REJECT)) { /* * if the device doesn't support the SenseID diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index 2975ce888c19..cb1879a96818 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -71,6 +71,8 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev) ccw->cda = (__u32) __pa (&cdev->private->pgid[i]); if (cdev->private->iretry > 0) { cdev->private->iretry--; + /* Reset internal retry indication. */ + cdev->private->flags.intretry = 0; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); /* ret is 0, -EBUSY, -EACCES or -ENODEV */ @@ -122,8 +124,14 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) sch = to_subchannel(cdev->dev.parent); irb = &cdev->private->irb; - if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) { + /* Retry Sense PGID if requested. */ + if (cdev->private->flags.intretry) { + cdev->private->flags.intretry = 0; + return -EAGAIN; + } return -ETIME; + } if (irb->esw.esw0.erw.cons && (irb->ecw[0]&(SNS0_CMD_REJECT|SNS0_INTERVENTION_REQ))) { /* @@ -253,6 +261,8 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func) ret = -EACCES; if (cdev->private->iretry > 0) { cdev->private->iretry--; + /* Reset internal retry indication. */ + cdev->private->flags.intretry = 0; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); /* We expect an interrupt in case of success or busy @@ -293,6 +303,8 @@ static int __ccw_device_do_nop(struct ccw_device *cdev) ret = -EACCES; if (cdev->private->iretry > 0) { cdev->private->iretry--; + /* Reset internal retry indication. */ + cdev->private->flags.intretry = 0; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); /* We expect an interrupt in case of success or busy @@ -321,8 +333,14 @@ __ccw_device_check_pgid(struct ccw_device *cdev) sch = to_subchannel(cdev->dev.parent); irb = &cdev->private->irb; - if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) { + /* Retry Set PGID if requested. */ + if (cdev->private->flags.intretry) { + cdev->private->flags.intretry = 0; + return -EAGAIN; + } return -ETIME; + } if (irb->esw.esw0.erw.cons) { if (irb->ecw[0] & SNS0_CMD_REJECT) return -EOPNOTSUPP; @@ -360,8 +378,14 @@ static int __ccw_device_check_nop(struct ccw_device *cdev) sch = to_subchannel(cdev->dev.parent); irb = &cdev->private->irb; - if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) { + /* Retry NOP if requested. */ + if (cdev->private->flags.intretry) { + cdev->private->flags.intretry = 0; + return -EAGAIN; + } return -ETIME; + } if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index 3f7cbce4cd87..bdcf930f7beb 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -319,6 +319,9 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb) sch->sense_ccw.count = SENSE_MAX_COUNT; sch->sense_ccw.flags = CCW_FLAG_SLI; + /* Reset internal retry indication. */ + cdev->private->flags.intretry = 0; + return cio_start (sch, &sch->sense_ccw, 0xff); } From 47af5518526d435d0d56a329a578f240e86eb678 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 4 Dec 2006 15:41:07 +0100 Subject: [PATCH 33/34] [S390] cio: Use device_reprobe() instead of bus_rescan_devices(). In io_subchannel_register(), it is better to just reprobe the current device if it hasn't a driver yet than to rescan the whole bus. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 359b46c25314..d3d3716ff84b 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -687,8 +687,20 @@ io_subchannel_register(void *data) cdev = data; sch = to_subchannel(cdev->dev.parent); + /* + * io_subchannel_register() will also be called after device + * recognition has been done for a boxed device (which will already + * be registered). We need to reprobe since we may now have sense id + * information. + */ if (klist_node_attached(&cdev->dev.knode_parent)) { - bus_rescan_devices(&ccw_bus_type); + if (!cdev->drv) { + ret = device_reprobe(&cdev->dev); + if (ret) + /* We can't do much here. */ + dev_info(&cdev->dev, "device_reprobe() returned" + " %d\n", ret); + } goto out; } /* make it known to the system */ From 74f8f557fd0c6f32e17e78c9ef508ca66ef37d3a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Dec 2006 15:41:10 +0100 Subject: [PATCH 34/34] [S390] Don't use small stacks when lockdep is used. The lock dependency validator adds a bunch of extra stack frames to the stack, which can cause stack overflows. Especially seen on 31 bit where the small stack is only 4k. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5b04c4095c8d..583d9ff0a571 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -178,7 +178,7 @@ config PACK_STACK config SMALL_STACK bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb" - depends on PACK_STACK + depends on PACK_STACK && !LOCKDEP help If you say Y here and the compiler supports the -mkernel-backchain option the kernel will use a smaller kernel stack size. For 31 bit