linux/drivers/pci/hotplug/pciehp_core.c
Mika Westerberg 87d0f2a553 PCI: pciehp: Prevent deadlock on disconnect
This addresses deadlocks in these common cases in hierarchies containing
two switches:

  - All involved ports are runtime suspended and they are unplugged. This
    can happen easily if the drivers involved automatically enable runtime
    PM (xHCI for example does that).

  - System is suspended (e.g., closing the lid on a laptop) with a dock +
    something else connected, and the dock is unplugged while suspended.

These cases lead to the following deadlock:

  INFO: task irq/126-pciehp:198 blocked for more than 120 seconds.
  irq/126-pciehp  D    0   198      2 0x80000000
  Call Trace:
   schedule+0x2c/0x80
   schedule_timeout+0x246/0x350
   wait_for_completion+0xb7/0x140
   kthread_stop+0x49/0x110
   free_irq+0x32/0x70
   pcie_shutdown_notification+0x2f/0x50
   pciehp_remove+0x27/0x50
   pcie_port_remove_service+0x36/0x50
   device_release_driver+0x12/0x20
   bus_remove_device+0xec/0x160
   device_del+0x13b/0x350
   device_unregister+0x1a/0x60
   remove_iter+0x1e/0x30
   device_for_each_child+0x56/0x90
   pcie_port_device_remove+0x22/0x40
   pcie_portdrv_remove+0x20/0x60
   pci_device_remove+0x3e/0xc0
   device_release_driver_internal+0x18c/0x250
   device_release_driver+0x12/0x20
   pci_stop_bus_device+0x6f/0x90
   pci_stop_bus_device+0x31/0x90
   pci_stop_and_remove_bus_device+0x12/0x20
   pciehp_unconfigure_device+0x88/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

  INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds.
  irq/190-pciehp  D    0  2288      2 0x80000000
  Call Trace:
   __schedule+0x2a2/0x880
   schedule+0x2c/0x80
   schedule_preempt_disabled+0xe/0x10
   mutex_lock+0x2c/0x30
   pci_lock_rescan_remove+0x15/0x20
   pciehp_unconfigure_device+0x4d/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

What happens here is that the whole hierarchy is runtime resumed and the
parent PCIe downstream port, which got the hot-remove event, starts
removing devices below it, taking pci_lock_rescan_remove() lock. When the
child PCIe port is runtime resumed it calls pciehp_check_presence() which
ends up calling pciehp_card_present() and pciehp_check_link_active().  Both
of these use pcie_capability_read_word(), which notices that the underlying
device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the
capability value set to 0. When pciehp gets this value it thinks that its
child device is also hot-removed and schedules its IRQ thread to handle the
event.

The deadlock happens when the child's IRQ thread runs and tries to acquire
pci_lock_rescan_remove() which is already taken by the parent and the
parent waits for the child's IRQ thread to finish.

Prevent this from happening by checking the return value of
pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop
performing any hot-removal activities.

[bhelgaas: add common scenarios to commit log]
Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 17:17:42 -06:00

366 lines
9.3 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* PCI Express Hot Plug Controller Driver
*
* Copyright (C) 1995,2001 Compaq Computer Corporation
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
* Copyright (C) 2001 IBM Corp.
* Copyright (C) 2003-2004 Intel Corporation
*
* All rights reserved.
*
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
*
* Authors:
* Dan Zink <dan.zink@compaq.com>
* Greg Kroah-Hartman <greg@kroah.com>
* Dely Sy <dely.l.sy@intel.com>"
*/
#define pr_fmt(fmt) "pciehp: " fmt
#define dev_fmt pr_fmt
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/pci.h>
#include "pciehp.h"
#include "../pci.h"
/* Global variables */
bool pciehp_poll_mode;
int pciehp_poll_time;
/*
* not really modular, but the easiest way to keep compat with existing
* bootargs behaviour is to continue using module_param here.
*/
module_param(pciehp_poll_mode, bool, 0644);
module_param(pciehp_poll_time, int, 0644);
MODULE_PARM_DESC(pciehp_poll_mode, "Using polling mechanism for hot-plug events or not");
MODULE_PARM_DESC(pciehp_poll_time, "Polling mechanism frequency, in seconds");
static int set_attention_status(struct hotplug_slot *slot, u8 value);
static int get_power_status(struct hotplug_slot *slot, u8 *value);
static int get_latch_status(struct hotplug_slot *slot, u8 *value);
static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
static int init_slot(struct controller *ctrl)
{
struct hotplug_slot_ops *ops;
char name[SLOT_NAME_SIZE];
int retval;
/* Setup hotplug slot ops */
ops = kzalloc(sizeof(*ops), GFP_KERNEL);
if (!ops)
return -ENOMEM;
ops->enable_slot = pciehp_sysfs_enable_slot;
ops->disable_slot = pciehp_sysfs_disable_slot;
ops->get_power_status = get_power_status;
ops->get_adapter_status = get_adapter_status;
ops->reset_slot = pciehp_reset_slot;
if (MRL_SENS(ctrl))
ops->get_latch_status = get_latch_status;
if (ATTN_LED(ctrl)) {
ops->get_attention_status = pciehp_get_attention_status;
ops->set_attention_status = set_attention_status;
} else if (ctrl->pcie->port->hotplug_user_indicators) {
ops->get_attention_status = pciehp_get_raw_indicator_status;
ops->set_attention_status = pciehp_set_raw_indicator_status;
}
/* register this slot with the hotplug pci core */
ctrl->hotplug_slot.ops = ops;
snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
retval = pci_hp_initialize(&ctrl->hotplug_slot,
ctrl->pcie->port->subordinate, 0, name);
if (retval) {
ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
kfree(ops);
}
return retval;
}
static void cleanup_slot(struct controller *ctrl)
{
struct hotplug_slot *hotplug_slot = &ctrl->hotplug_slot;
pci_hp_destroy(hotplug_slot);
kfree(hotplug_slot->ops);
}
/*
* set_attention_status - Turns the Attention Indicator on, off or blinking
*/
static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl->pcie->port;
if (status)
status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
else
status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
pci_config_pm_runtime_get(pdev);
pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
pci_config_pm_runtime_put(pdev);
return 0;
}
static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl->pcie->port;
pci_config_pm_runtime_get(pdev);
pciehp_get_power_status(ctrl, value);
pci_config_pm_runtime_put(pdev);
return 0;
}
static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl->pcie->port;
pci_config_pm_runtime_get(pdev);
pciehp_get_latch_status(ctrl, value);
pci_config_pm_runtime_put(pdev);
return 0;
}
static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
{
struct controller *ctrl = to_ctrl(hotplug_slot);
struct pci_dev *pdev = ctrl->pcie->port;
int ret;
pci_config_pm_runtime_get(pdev);
ret = pciehp_card_present_or_link_active(ctrl);
pci_config_pm_runtime_put(pdev);
if (ret < 0)
return ret;
*value = ret;
return 0;
}
/**
* pciehp_check_presence() - synthesize event if presence has changed
*
* On probe and resume, an explicit presence check is necessary to bring up an
* occupied slot or bring down an unoccupied slot. This can't be triggered by
* events in the Slot Status register, they may be stale and are therefore
* cleared. Secondly, sending an interrupt for "events that occur while
* interrupt generation is disabled [when] interrupt generation is subsequently
* enabled" is optional per PCIe r4.0, sec 6.7.3.4.
*/
static void pciehp_check_presence(struct controller *ctrl)
{
int occupied;
down_read(&ctrl->reset_lock);
mutex_lock(&ctrl->state_lock);
occupied = pciehp_card_present_or_link_active(ctrl);
if ((occupied > 0 && (ctrl->state == OFF_STATE ||
ctrl->state == BLINKINGON_STATE)) ||
(!occupied && (ctrl->state == ON_STATE ||
ctrl->state == BLINKINGOFF_STATE)))
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
mutex_unlock(&ctrl->state_lock);
up_read(&ctrl->reset_lock);
}
static int pciehp_probe(struct pcie_device *dev)
{
int rc;
struct controller *ctrl;
/* If this is not a "hotplug" service, we have no business here. */
if (dev->service != PCIE_PORT_SERVICE_HP)
return -ENODEV;
if (!dev->port->subordinate) {
/* Can happen if we run out of bus numbers during probe */
pci_err(dev->port,
"Hotplug bridge without secondary bus, ignoring\n");
return -ENODEV;
}
ctrl = pcie_init(dev);
if (!ctrl) {
pci_err(dev->port, "Controller initialization failed\n");
return -ENODEV;
}
set_service_data(dev, ctrl);
/* Setup the slot information structures */
rc = init_slot(ctrl);
if (rc) {
if (rc == -EBUSY)
ctrl_warn(ctrl, "Slot already registered by another hotplug driver\n");
else
ctrl_err(ctrl, "Slot initialization failed (%d)\n", rc);
goto err_out_release_ctlr;
}
/* Enable events after we have setup the data structures */
rc = pcie_init_notification(ctrl);
if (rc) {
ctrl_err(ctrl, "Notification initialization failed (%d)\n", rc);
goto err_out_free_ctrl_slot;
}
/* Publish to user space */
rc = pci_hp_add(&ctrl->hotplug_slot);
if (rc) {
ctrl_err(ctrl, "Publication to user space failed (%d)\n", rc);
goto err_out_shutdown_notification;
}
pciehp_check_presence(ctrl);
return 0;
err_out_shutdown_notification:
pcie_shutdown_notification(ctrl);
err_out_free_ctrl_slot:
cleanup_slot(ctrl);
err_out_release_ctlr:
pciehp_release_ctrl(ctrl);
return -ENODEV;
}
static void pciehp_remove(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
pci_hp_del(&ctrl->hotplug_slot);
pcie_shutdown_notification(ctrl);
cleanup_slot(ctrl);
pciehp_release_ctrl(ctrl);
}
#ifdef CONFIG_PM
static bool pme_is_native(struct pcie_device *dev)
{
const struct pci_host_bridge *host;
host = pci_find_host_bridge(dev->port->bus);
return pcie_ports_native || host->native_pme;
}
static void pciehp_disable_interrupt(struct pcie_device *dev)
{
/*
* Disable hotplug interrupt so that it does not trigger
* immediately when the downstream link goes down.
*/
if (pme_is_native(dev))
pcie_disable_interrupt(get_service_data(dev));
}
#ifdef CONFIG_PM_SLEEP
static int pciehp_suspend(struct pcie_device *dev)
{
/*
* If the port is already runtime suspended we can keep it that
* way.
*/
if (dev_pm_smart_suspend_and_suspended(&dev->port->dev))
return 0;
pciehp_disable_interrupt(dev);
return 0;
}
static int pciehp_resume_noirq(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
/* pci_restore_state() just wrote to the Slot Control register */
ctrl->cmd_started = jiffies;
ctrl->cmd_busy = true;
/* clear spurious events from rediscovery of inserted card */
if (ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE)
pcie_clear_hotplug_events(ctrl);
return 0;
}
#endif
static int pciehp_resume(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
if (pme_is_native(dev))
pcie_enable_interrupt(ctrl);
pciehp_check_presence(ctrl);
return 0;
}
static int pciehp_runtime_suspend(struct pcie_device *dev)
{
pciehp_disable_interrupt(dev);
return 0;
}
static int pciehp_runtime_resume(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
/* pci_restore_state() just wrote to the Slot Control register */
ctrl->cmd_started = jiffies;
ctrl->cmd_busy = true;
/* clear spurious events from rediscovery of inserted card */
if ((ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE) &&
pme_is_native(dev))
pcie_clear_hotplug_events(ctrl);
return pciehp_resume(dev);
}
#endif /* PM */
static struct pcie_port_service_driver hpdriver_portdrv = {
.name = "pciehp",
.port_type = PCIE_ANY_PORT,
.service = PCIE_PORT_SERVICE_HP,
.probe = pciehp_probe,
.remove = pciehp_remove,
#ifdef CONFIG_PM
#ifdef CONFIG_PM_SLEEP
.suspend = pciehp_suspend,
.resume_noirq = pciehp_resume_noirq,
.resume = pciehp_resume,
#endif
.runtime_suspend = pciehp_runtime_suspend,
.runtime_resume = pciehp_runtime_resume,
#endif /* PM */
};
int __init pcie_hp_init(void)
{
int retval = 0;
retval = pcie_port_service_register(&hpdriver_portdrv);
pr_debug("pcie_port_service_register = %d\n", retval);
if (retval)
pr_debug("Failure to register service\n");
return retval;
}