From 05a05872c8d4b4357c9d913e6d73ae64882bddf5 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Tue, 7 Jun 2016 20:13:08 -0300 Subject: [PATCH 1/8] lpfc: fix oops in lpfc_sli4_scmd_to_wqidx_distr() from lpfc_send_taskmgmt() The lpfc_sli4_scmd_to_wqidx_distr() function expects the scsi_cmnd 'lpfc_cmd->pCmd' not to be null, and point to the midlayer command. That's not true in the .eh_(device|target|bus)_reset_handler path, because lpfc_send_taskmgmt() sends commands not from the midlayer, so does not set 'lpfc_cmd->pCmd'. That is true in the .queuecommand path because lpfc_queuecommand() stores the scsi_cmnd from midlayer in lpfc_cmd->pCmd; and lpfc_cmd is stored by lpfc_scsi_prep_cmnd() in piocbq->context1 -- which is passed to lpfc_sli4_scmd_to_wqidx_distr() as lpfc_cmd parameter. This problem can be hit on SCSI EH, and immediately with sg_reset. These 2 test-cases demonstrate the problem/fix with next-20160601. Test-case 1) sg_reset # strace sg_reset --device /dev/sdm <...> open("/dev/sdm", O_RDWR|O_NONBLOCK) = 3 ioctl(3, SG_SCSI_RESET, 0x3fffde6d0994 +++ killed by SIGSEGV +++ Segmentation fault # dmesg Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xd00000001c88442c Oops: Kernel access of bad area, sig: 11 [#1] <...> CPU: 104 PID: 16333 Comm: sg_reset Tainted: G W 4.7.0-rc1-next-20160601-00004-g95b89dc #6 <...> NIP [d00000001c88442c] lpfc_sli4_scmd_to_wqidx_distr+0xc/0xd0 [lpfc] LR [d00000001c826fe8] lpfc_sli_calc_ring.part.27+0x98/0xd0 [lpfc] Call Trace: [c000003c9ec876f0] [c000003c9ec87770] 0xc000003c9ec87770 (unreliable) [c000003c9ec87720] [d00000001c82e004] lpfc_sli_issue_iocb+0xd4/0x260 [lpfc] [c000003c9ec87780] [d00000001c831a3c] lpfc_sli_issue_iocb_wait+0x15c/0x5b0 [lpfc] [c000003c9ec87880] [d00000001c87f27c] lpfc_send_taskmgmt+0x24c/0x650 [lpfc] [c000003c9ec87950] [d00000001c87fd7c] lpfc_device_reset_handler+0x10c/0x200 [lpfc] [c000003c9ec87a10] [c000000000610694] scsi_try_bus_device_reset+0x44/0xc0 [c000003c9ec87a40] [c0000000006113e8] scsi_ioctl_reset+0x198/0x2c0 [c000003c9ec87bf0] [c00000000060fe5c] scsi_ioctl+0x13c/0x4b0 [c000003c9ec87c80] [c0000000006629b0] sd_ioctl+0xf0/0x120 [c000003c9ec87cd0] [c00000000046e4f8] blkdev_ioctl+0x248/0xb70 [c000003c9ec87d30] [c0000000002a1f60] block_ioctl+0x70/0x90 [c000003c9ec87d50] [c00000000026d334] do_vfs_ioctl+0xc4/0x890 [c000003c9ec87de0] [c00000000026db60] SyS_ioctl+0x60/0xc0 [c000003c9ec87e30] [c000000000009120] system_call+0x38/0x108 Instruction dump: <...> With fix: # strace sg_reset --device /dev/sdm <...> open("/dev/sdm", O_RDWR|O_NONBLOCK) = 3 ioctl(3, SG_SCSI_RESET, 0x3fffe103c554) = 0 close(3) = 0 exit_group(0) = ? +++ exited with 0 +++ # dmesg [ 424.658649] lpfc 0006:01:00.4: 4:(0):0713 SCSI layer issued Device Reset (1, 0) return x2002 Test-case 2) SCSI EH Using this debug patch to wire an SCSI EH trigger, for lpfc_scsi_cmd_iocb_cmpl(): - cmd->scsi_done(cmd); + if ((phba->pport ? phba->pport->cfg_log_verbose : phba->cfg_log_verbose) == 0x32100000) + printk(KERN_ALERT "lpfc: skip scsi_done()\n"); + else + cmd->scsi_done(cmd); # echo 0x32100000 > /sys/class/scsi_host/host11/lpfc_log_verbose # dd if=/dev/sdm of=/dev/null iflag=direct & <...> After a while: # dmesg lpfc 0006:01:00.4: 4:(0):3053 lpfc_log_verbose changed from 0 (x0) to 839909376 (x32100000) lpfc: skip scsi_done() <...> Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xd0000000199e448c Oops: Kernel access of bad area, sig: 11 [#1] <...> CPU: 96 PID: 28556 Comm: scsi_eh_11 Tainted: G W 4.7.0-rc1-next-20160601-00004-g95b89dc #6 <...> NIP [d0000000199e448c] lpfc_sli4_scmd_to_wqidx_distr+0xc/0xd0 [lpfc] LR [d000000019986fe8] lpfc_sli_calc_ring.part.27+0x98/0xd0 [lpfc] Call Trace: [c000000ff0d0b890] [c000000ff0d0b900] 0xc000000ff0d0b900 (unreliable) [c000000ff0d0b8c0] [d00000001998e004] lpfc_sli_issue_iocb+0xd4/0x260 [lpfc] [c000000ff0d0b920] [d000000019991a3c] lpfc_sli_issue_iocb_wait+0x15c/0x5b0 [lpfc] [c000000ff0d0ba20] [d0000000199df27c] lpfc_send_taskmgmt+0x24c/0x650 [lpfc] [c000000ff0d0baf0] [d0000000199dfd7c] lpfc_device_reset_handler+0x10c/0x200 [lpfc] [c000000ff0d0bbb0] [c000000000610694] scsi_try_bus_device_reset+0x44/0xc0 [c000000ff0d0bbe0] [c0000000006126cc] scsi_eh_ready_devs+0x49c/0x9c0 [c000000ff0d0bcb0] [c000000000614160] scsi_error_handler+0x580/0x680 [c000000ff0d0bd80] [c0000000000ae848] kthread+0x108/0x130 [c000000ff0d0be30] [c0000000000094a8] ret_from_kernel_thread+0x5c/0xb4 Instruction dump: <...> With fix: # dmesg lpfc 0006:01:00.4: 4:(0):3053 lpfc_log_verbose changed from 0 (x0) to 839909376 (x32100000) lpfc: skip scsi_done() <...> lpfc 0006:01:00.4: 4:(0):0713 SCSI layer issued Device Reset (0, 0) return x2002 <...> lpfc 0006:01:00.4: 4:(0):0723 SCSI layer issued Target Reset (1, 0) return x2002 <...> lpfc 0006:01:00.4: 4:(0):0714 SCSI layer issued Bus Reset Data: x2002 <...> lpfc 0006:01:00.4: 4:(0):3172 SCSI layer issued Host Reset Data: <...> Fixes: 8b0dff14164d ("lpfc: Add support for using block multi-queue") Cc: # v4.2+ Signed-off-by: Mauricio Faria de Oliveira Reviewed-by: Johannes Thumshirn Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a5655d571906..d197aa176dee 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3877,7 +3877,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (shost_use_blk_mq(cmnd->device->host)) { + if (cmnd && shost_use_blk_mq(cmnd->device->host)) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); From 1bd2b2823e5aa219b2442fd4568a075efe81de5f Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Thu, 21 Jul 2016 15:44:04 -0500 Subject: [PATCH 2/8] cxlflash: Verify problem state area is mapped before notifying shutdown If an EEH or some other hard error occurs while the adapter instance was being initialized, on the subsequent shutdown of the device, the system could crash with: [c000000f1da03b60] c0000000005eccfc pci_device_shutdown+0x6c/0x100 [c000000f1da03ba0] c0000000006d67d4 device_shutdown+0x1b4/0x2c0 [c000000f1da03c40] c0000000000ea30c kernel_restart_prepare+0x5c/0x80 [c000000f1da03c70] c0000000000ea48c kernel_restart+0x2c/0xc0 [c000000f1da03ce0] c0000000000ea970 SyS_reboot+0x1c0/0x2d0 [c000000f1da03e30] c000000000009204 system_call+0x38/0xb4 This crash is due to the AFU not being mapped when the shutdown notification routine is called and is a regression that was inserted recently with Commit 704c4b0ddc03 ("cxlflash: Shutdown notify support for CXL Flash cards"). As a fix, shutdown notification should only occur when the AFU is mapped. Fixes: 704c4b0ddc03 ("cxlflash: Shutdown notify support for CXL Flash cards") Signed-off-by: Uma Krishnan Reviewed-by: Andrew Donnellan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen --- drivers/scsi/cxlflash/main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 860008d42466..661bb94e2548 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -778,7 +778,7 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) { struct afu *afu = cfg->afu; struct device *dev = &cfg->dev->dev; - struct sisl_global_map __iomem *global = &afu->afu_map->global; + struct sisl_global_map __iomem *global; struct dev_dependent_vals *ddv; u64 reg, status; int i, retry_cnt = 0; @@ -787,6 +787,14 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) if (!(ddv->flags & CXLFLASH_NOTIFY_SHUTDOWN)) return; + if (!afu || !afu->afu_map) { + dev_dbg(dev, "%s: The problem state area is not mapped\n", + __func__); + return; + } + + global = &afu->afu_map->global; + /* Notify AFU */ for (i = 0; i < NUM_FC_PORTS; i++) { reg = readq_be(&global->fc_regs[i][FC_CONFIG2 / 8]); From b2c0627c26afcef0ac115e9505ea9d1db5d88c9b Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Thu, 21 Jul 2016 15:44:37 -0500 Subject: [PATCH 3/8] MAINTAINERS: Update cxlflash maintainers Adding myself as a cxlflash maintainer. Signed-off-by: Uma Krishnan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5a367e5368a3..adb68eb92d95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3434,6 +3434,7 @@ F: Documentation/ABI/testing/sysfs-class-cxl CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER M: Manoj N. Kumar M: Matthew R. Ochs +M: Uma Krishnan L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/cxlflash/ From b195d5e2bffd3de3f07e8683e6abddf099ea0822 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 15 Jul 2016 14:48:03 -0500 Subject: [PATCH 4/8] ipr: Wait to do async scan until scsi host is initialized When performing an async scan, make sure the kthread doing scanning doesn't start before the scsi host is fully initialized. Signed-off-by: Brian King Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 9 +++++++++ drivers/scsi/ipr.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1f539c288ae8..95e4834c58ee 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3288,6 +3288,11 @@ static void ipr_worker_thread(struct work_struct *work) return; } + if (!ioa_cfg->scan_enabled) { + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return; + } + restart: do { did_work = 0; @@ -10362,6 +10367,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; + unsigned long flags; int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -10414,7 +10420,10 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); return 0; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1d42c7464dfc..cdb51960b53c 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1478,6 +1478,7 @@ struct ipr_ioa_cfg { u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; u8 dump_taken:1; + u8 scan_enabled:1; u8 scan_done:1; u8 needs_hard_reset:1; u8 dual_raid:1; From d242e6680e81cea0343bd93ba862efa70a91a56c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Jul 2016 10:29:23 +0200 Subject: [PATCH 5/8] fcoe: Use default VLAN for FIP VLAN discovery FC-BB-6 states: FIP protocols shall be performed on a per-VLAN basis. It is recommended to use the FIP VLAN discovery protocol on the default VLAN. Signed-off-by: Hannes Reinecke Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe.c | 49 +++++++++++++++++++++++++++++++++++++++- drivers/scsi/fcoe/fcoe.h | 1 + 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index c8a4305c7662..197dc62ea67a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -92,6 +92,8 @@ static struct fcoe_interface static int fcoe_fip_recv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +static int fcoe_fip_vlan_recv(struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *); static void fcoe_fip_send(struct fcoe_ctlr *, struct sk_buff *); static void fcoe_update_src_mac(struct fc_lport *, u8 *); @@ -363,6 +365,12 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, fcoe->fip_packet_type.dev = netdev; dev_add_pack(&fcoe->fip_packet_type); + if (netdev != real_dev) { + fcoe->fip_vlan_packet_type.func = fcoe_fip_vlan_recv; + fcoe->fip_vlan_packet_type.type = htons(ETH_P_FIP); + fcoe->fip_vlan_packet_type.dev = real_dev; + dev_add_pack(&fcoe->fip_vlan_packet_type); + } return 0; } @@ -450,6 +458,8 @@ static void fcoe_interface_remove(struct fcoe_interface *fcoe) */ __dev_remove_pack(&fcoe->fcoe_packet_type); __dev_remove_pack(&fcoe->fip_packet_type); + if (netdev != fcoe->realdev) + __dev_remove_pack(&fcoe->fip_vlan_packet_type); synchronize_net(); /* Delete secondary MAC addresses */ @@ -519,6 +529,29 @@ static int fcoe_fip_recv(struct sk_buff *skb, struct net_device *netdev, return 0; } +/** + * fcoe_fip_vlan_recv() - Handler for received FIP VLAN discovery frames + * @skb: The receive skb + * @netdev: The associated net device + * @ptype: The packet_type structure which was used to register this handler + * @orig_dev: The original net_device the the skb was received on. + * (in case dev is a bond) + * + * Returns: 0 for success + */ +static int fcoe_fip_vlan_recv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *ptype, + struct net_device *orig_dev) +{ + struct fcoe_interface *fcoe; + struct fcoe_ctlr *ctlr; + + fcoe = container_of(ptype, struct fcoe_interface, fip_vlan_packet_type); + ctlr = fcoe_to_ctlr(fcoe); + fcoe_ctlr_recv(ctlr, skb); + return 0; +} + /** * fcoe_port_send() - Send an Ethernet-encapsulated FIP/FCoE frame * @port: The FCoE port @@ -539,7 +572,21 @@ static void fcoe_port_send(struct fcoe_port *port, struct sk_buff *skb) */ static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb) { - skb->dev = fcoe_from_ctlr(fip)->netdev; + struct fcoe_interface *fcoe = fcoe_from_ctlr(fip); + struct fip_frame { + struct ethhdr eth; + struct fip_header fip; + } __packed *frame; + + /* + * Use default VLAN for FIP VLAN discovery protocol + */ + frame = (struct fip_frame *)skb->data; + if (frame->fip.fip_op == ntohs(FIP_OP_VLAN) && + fcoe->realdev != fcoe->netdev) + skb->dev = fcoe->realdev; + else + skb->dev = fcoe->netdev; fcoe_port_send(lport_priv(fip->lp), skb); } diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index 2b53672bf932..6aa4820f6cc0 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -80,6 +80,7 @@ struct fcoe_interface { struct net_device *realdev; struct packet_type fcoe_packet_type; struct packet_type fip_packet_type; + struct packet_type fip_vlan_packet_type; struct fc_exch_mgr *oem; u8 removed; u8 priority; From 22466da5b4b7a82d3e5a9c21c752cae91a21dc91 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 29 Jul 2016 15:30:56 +0200 Subject: [PATCH 6/8] lpfc: Fix possible NULL pointer dereference Check for the existence of piocb->vport before accessing it. Signed-off-by: Johannes Thumshirn Acked-by: James Smart Reviewed-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 351d08ace24a..7080ce2920fd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1323,21 +1323,18 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, { lockdep_assert_held(&phba->hbalock); + BUG_ON(!piocb || !piocb->vport); + list_add_tail(&piocb->list, &pring->txcmplq); piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ; if ((unlikely(pring->ringno == LPFC_ELS_RING)) && (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) && - (!(piocb->vport->load_flag & FC_UNLOADING))) { - if (!piocb->vport) - BUG(); - else - mod_timer(&piocb->vport->els_tmofunc, - jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); - } - + (!(piocb->vport->load_flag & FC_UNLOADING))) + mod_timer(&piocb->vport->els_tmofunc, + jiffies + + msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); return 0; } From a561a8ea837182e2ac2c699e64a33b76f29d1668 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 29 Jul 2016 15:54:48 +0000 Subject: [PATCH 7/8] fcoe: add missing destroy_workqueue() on error in fcoe_init() Add the missing destroy_workqueue() before return from fcoe_init() in the fcoe transport register failed error handling case. Signed-off-by: Wei Yongjun Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 197dc62ea67a..9bd41a35a78a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -2495,7 +2495,7 @@ static int __init fcoe_init(void) if (rc) { printk(KERN_ERR "failed to register an fcoe transport, check " "if libfcoe is loaded\n"); - return rc; + goto out_destroy; } mutex_lock(&fcoe_config_mutex); @@ -2518,6 +2518,7 @@ static int __init fcoe_init(void) out_free: mutex_unlock(&fcoe_config_mutex); +out_destroy: destroy_workqueue(fcoe_wq); return rc; } From c8e18acccb9be6dd1aa4fb8dbc1b98b3b7d6d8e1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 29 Jul 2016 16:00:45 +0000 Subject: [PATCH 8/8] ipr: Fix error return code in ipr_probe_ioa() Fix to return error code -ENOMEM from the workqueue alloc error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 95e4834c58ee..bf85974be862 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10219,6 +10219,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (!ioa_cfg->reset_work_q) { dev_err(&pdev->dev, "Couldn't register reset workqueue\n"); + rc = -ENOMEM; goto out_free_irq; } } else