From 70675e0b6a1ae20f8e93f9fc083d28034ad38948 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 29 Jul 2015 16:52:58 +0800 Subject: [PATCH 01/26] PCI: Don't try to restore VF BARs VF BARs are read-only zero, so updating VF BARs will not have any effect. See the SR-IOV spec r1.1, sec 3.4.1.11. Don't update VF BARs in pci_restore_bars(). This avoids spurious "BAR %d: error updating" messages that we see when doing vfio pass-through after 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state"). [bhelgaas: changelog, fix whitespace] Signed-off-by: Wei Yang Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 6 +++++- drivers/pci/setup-res.c | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6a9a1116f1eb..1cfb47a9a9ff 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -484,7 +484,7 @@ int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask) } /** - * pci_restore_bars - restore a devices BAR values (e.g. after wake-up) + * pci_restore_bars - restore a device's BAR values (e.g. after wake-up) * @dev: PCI device to have its BARs restored * * Restore the BAR values for a given device, so as to make it @@ -494,6 +494,10 @@ static void pci_restore_bars(struct pci_dev *dev) { int i; + /* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */ + if (dev->is_virtfn) + return; + for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) pci_update_resource(dev, i); } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 232f9254c11a..152de5cd1cfc 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -36,6 +36,11 @@ void pci_update_resource(struct pci_dev *dev, int resno) enum pci_bar_type type; struct resource *res = dev->resource + resno; + if (dev->is_virtfn) { + dev_warn(&dev->dev, "can't update VF BAR%d\n", resno); + return; + } + /* * Ignore resources for unimplemented BARs and unused resource slots * for 64 bit BARs. From b07461a8e45b7a62ef7fb46e4f6ada66f63406a8 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 17 Sep 2015 10:09:37 -0500 Subject: [PATCH 02/26] PCI/AER: Clear error status registers during enumeration and restore AER errors might be recorded when powering-on devices. These errors can be ignored, so firmware usually clears them before the OS enumerates devices. However, firmware is not involved when devices are added via hotplug, so the OS may discover power-up errors that should be ignored. The same may happen when powering up devices when resuming after suspend. Clear the AER error status registers during enumeration and resume. [bhelgaas: changelog, remove repetitive comments] Signed-off-by: Taku Izumi Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 3 +++ drivers/pci/pcie/aer/aerdrv_core.c | 28 ++++++++++++++++++++++++++++ drivers/pci/probe.c | 3 +++ include/linux/aer.h | 5 +++++ 4 files changed, 39 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6a9a1116f1eb..62ecf45a4230 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "pci.h" const char *pci_power_names[] = { @@ -1099,6 +1100,8 @@ void pci_restore_state(struct pci_dev *dev) pci_restore_ats_state(dev); pci_restore_vc_state(dev); + pci_cleanup_aer_error_status_regs(dev); + pci_restore_config_space(dev); pci_restore_pcix_state(dev); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9803e3d039fe..fba785e9df75 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -74,6 +74,34 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + int pos; + u32 status; + int port_type; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + if (!pos) + return -EIO; + + port_type = pci_pcie_type(dev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT) { + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); + } + + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} + /** * add_error_device - list device to be handled * @e_info: pointer to error info diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0b2be174d981..8cd9710498e4 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "pci.h" @@ -1621,6 +1622,8 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Enable ACS P2P upstream forwarding */ pci_enable_acs(dev); + + pci_cleanup_aer_error_status_regs(dev); } static void pci_set_msi_domain(struct pci_dev *dev) diff --git a/include/linux/aer.h b/include/linux/aer.h index 4fef65e57023..744b997d6a94 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,6 +42,7 @@ struct aer_capability_regs { int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); #else static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) { @@ -55,6 +56,10 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + return -EINVAL; +} #endif void cper_print_aer(struct pci_dev *dev, int cper_severity, From 9222097f0d2f88db602c0340d19d90a1c72b5fec Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 10 Sep 2015 18:40:31 +0800 Subject: [PATCH 03/26] PCI: Remove unnecessary "if" statement In store_remove_id(), set the default return value to -ENODEV, and overwrite it with the input buffer size if we find a matching list entry. Then we don't need to test whether to return an error or the count. No functional change. [bhelgaas: changelog] Signed-off-by: Zhen Lei Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-driver.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd652f2ae03d..2865ba37e295 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -172,7 +172,7 @@ static ssize_t store_remove_id(struct device_driver *driver, const char *buf, __u32 vendor, device, subvendor = PCI_ANY_ID, subdevice = PCI_ANY_ID, class = 0, class_mask = 0; int fields = 0; - int retval = -ENODEV; + size_t retval = -ENODEV; fields = sscanf(buf, "%x %x %x %x %x %x", &vendor, &device, &subvendor, &subdevice, @@ -190,15 +190,13 @@ static ssize_t store_remove_id(struct device_driver *driver, const char *buf, !((id->class ^ class) & class_mask)) { list_del(&dynid->node); kfree(dynid); - retval = 0; + retval = count; break; } } spin_unlock(&pdrv->dynids.lock); - if (retval) - return retval; - return count; + return retval; } static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id); From 0b26cd69480d4d99f39042c6621a05af7496b3da Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 21 Sep 2015 18:26:45 -0500 Subject: [PATCH 04/26] PCI: Clear IORESOURCE_UNSET when reverting to firmware-assigned address If pci_assign_resource() fails to assign space for a BAR, we may restore the BAR to whatever firmware left there at boot-time (this depends on whether the arch implements pcibios_retrieve_fw_addr()). The messages we print are not as useful as they could be: pci 0000:00:01.0: BAR 15: assigned [mem 0xc0000000-0xc01fffff 64bit pref] pci 0000:01:00.0: BAR 0: no space for [mem size 0x10000000 pref] pci 0000:01:00.0: BAR 0: trying firmware assignment [mem size 0x10000000 pref] pci 0000:01:00.0: BAR 0: [mem size 0x10000000 pref] conflicts with PCI Bus 0000:00 [mem 0xc0000000-0xffffffff window] The last two lines should contain the actual BAR address, not the size. Clear IORESOURCE_UNSET so we print the address. If requesting the firmware-assigned resource fails, mark it IORESOURCE_UNSET again. This is a cosmetic change to clarify the message: previously, if pci_revert_fw_address() succeeded, pci_assign_resource() cleared IORESOURCE_UNSET anyway, so this isn't really a functional change. Link: https://bugzilla.kernel.org/show_bug.cgi?id=85491#c50 Signed-off-by: Bjorn Helgaas --- drivers/pci/setup-res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 232f9254c11a..54c4f4f8105d 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -177,6 +177,7 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, end = res->end; res->start = fw_addr; res->end = res->start + size - 1; + res->flags &= ~IORESOURCE_UNSET; root = pci_find_parent_resource(dev, res); if (!root) { @@ -194,6 +195,7 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, resno, res, conflict->name, conflict); res->start = start; res->end = end; + res->flags |= IORESOURCE_UNSET; return -EBUSY; } return 0; From 10b4ad1a53e40425122a1a8f21f2d7428fa31e08 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 24 Sep 2015 17:02:07 -0500 Subject: [PATCH 05/26] PCI: Disable MSI on SiS 761 MSI is broken on SiS 761 chipset at least on PC Chips A31G board. No interrupts are delivered once MSI is enabled for a device. This causes hang on X11 start with a nVidia card installed (with nouveau driver). Disable MSI completely for this chipset. Signed-off-by: Ondrej Zary Signed-off-by: Bjorn Helgaas --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6a30252cd79f..83be0e555548 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2230,6 +2230,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disab DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, 0x0761, quirk_disable_all_msi); /* Disable MSI on chipsets that are known to not support it */ static void quirk_disable_msi(struct pci_dev *dev) From b4eb6cdbbd13698704863f680c643c569909e1c2 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 25 Aug 2015 20:25:37 -0400 Subject: [PATCH 06/26] PCI: Add builtin_pci_driver() to avoid registration boilerplate In f309d4443130 ("platform_device: better support builtin boilerplate avoidance"), we introduced the builtin_driver() macro. Here we use that support and extend it to PCI driver registration, so where a driver is clearly non-modular and builtin-only, we can register it in a similar fashion. Existing code that is clearly non-modular can be updated with the simple mapping of module_pci_driver(...) ---> builtin_pci_driver(...) We've essentially cloned the former to make the latter, and taken out the remove/module_exit parts since those never get used in a non-modular build of the code. Signed-off-by: Paul Gortmaker Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index e90eb22de628..b54fbf1571b1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1192,6 +1192,17 @@ void pci_unregister_driver(struct pci_driver *dev); module_driver(__pci_driver, pci_register_driver, \ pci_unregister_driver) +/** + * builtin_pci_driver() - Helper macro for registering a PCI driver + * @__pci_driver: pci_driver struct + * + * Helper macro for PCI drivers which do not do anything special in their + * init code. This eliminates a lot of boilerplate. Each driver may only + * use this macro once, and calling it replaces device_initcall(...) + */ +#define builtin_pci_driver(__pci_driver) \ + builtin_driver(__pci_driver, pci_register_driver) + struct pci_driver *pci_dev_driver(const struct pci_dev *dev); int pci_add_dynid(struct pci_driver *drv, unsigned int vendor, unsigned int device, From 68d0d9794864a6bfbc6f88bb0ec980400bc17922 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Fri, 9 Oct 2015 00:51:46 +0600 Subject: [PATCH 07/26] x86/PCI: Make pci_subsys_init() static The pci_subsys_init() is a subsys_initcall that can be declared static. Signed-off-by: Alexander Kuleshov Signed-off-by: Bjorn Helgaas --- arch/x86/pci/legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 5b662c0faf8c..ea6f3802c17b 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -54,7 +54,7 @@ void pcibios_scan_specific_bus(int busn) } EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); -int __init pci_subsys_init(void) +static int __init pci_subsys_init(void) { /* * The init function returns an non zero value when From a86760664f4cf44c0981ac0c91777eed3a2970e4 Mon Sep 17 00:00:00 2001 From: Romain Bezut Date: Thu, 24 Sep 2015 01:31:16 +0200 Subject: [PATCH 08/26] PCI/MSI: Export all remapped MSIs to sysfs attributes irqbalance uses sysfs attributes to populate its internal database, which is then used to bind the IRQ to the appropriate NUMA node. On a device accepting multiple MSIs and with interrupt remapping enabled, only the first IRQ entry is exported in the "msi_irqs" directory. This results in irqbalance having no clue of the NUMA affinity for the extra IRQs, so it can't bind them to the correct node. Export all MSI interrupts as sysfs attributes when relevant. [bhelgaas: changelog] Signed-off-by: Romain Bezut Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner --- drivers/pci/msi.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index d4497141d083..324a1643fce2 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -475,10 +475,11 @@ static int populate_msi_sysfs(struct pci_dev *pdev) int ret = -ENOMEM; int num_msi = 0; int count = 0; + int i; /* Determine how many msi entries we have */ for_each_pci_msi_entry(entry, pdev) - ++num_msi; + num_msi += entry->nvec_used; if (!num_msi) return 0; @@ -487,19 +488,21 @@ static int populate_msi_sysfs(struct pci_dev *pdev) if (!msi_attrs) return -ENOMEM; for_each_pci_msi_entry(entry, pdev) { - msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL); - if (!msi_dev_attr) - goto error_attrs; - msi_attrs[count] = &msi_dev_attr->attr; + for (i = 0; i < entry->nvec_used; i++) { + msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL); + if (!msi_dev_attr) + goto error_attrs; + msi_attrs[count] = &msi_dev_attr->attr; - sysfs_attr_init(&msi_dev_attr->attr); - msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d", - entry->irq); - if (!msi_dev_attr->attr.name) - goto error_attrs; - msi_dev_attr->attr.mode = S_IRUGO; - msi_dev_attr->show = msi_mode_show; - ++count; + sysfs_attr_init(&msi_dev_attr->attr); + msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d", + entry->irq + i); + if (!msi_dev_attr->attr.name) + goto error_attrs; + msi_dev_attr->attr.mode = S_IRUGO; + msi_dev_attr->show = msi_mode_show; + ++count; + } } msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL); From 8affb487d4a4e223d961d7034cb41cd31982b618 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2015 12:23:34 +0200 Subject: [PATCH 09/26] x86/PCI: Don't alloc pcibios-irq when MSI is enabled The pcibios-irq and MSI both use dev->irq to store the IRQ number. While the MSI code checks for that and frees the pcibios-irq before overwriting dev->irq, the pcibios_alloc_irq() function does not. Usually this is not a problem, as the pcibios-irq is allocated before probe time of the device and the MSI IRQ is allocted from the driver's probe path. But there are PCI devices handled by the core kernel and not by a standard PCI driver, like the AMD IOMMU for example. For the AMD IOMMU a normal PCI device driver does not make sense, because a driver can be forcibly unbound from its device, which is not a good idea for an IOMMU. Nevertheless the PCI core code tries to match the PCI device implementing the AMD IOMMU against drivers, and allocates/frees a pcibios IRQ every time it tries out a new driver. This overwrites the dev->irq field set by pci_enable_msi() and sets it to 0 in the end (because the probe fails and the pcibios-irq is freed again). On suspend/resume this breaks the kernel, because the IRQ descriptor for IRQ 0 is NULL. Fix this by not allocating a pcibios-irq when MSI is already active. This also has the benefit, that a device claimed by the core kernel can not be probed by a PCI driver later. Reported-by: Borislav Petkov Signed-off-by: Joerg Roedel Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Cc: Jiang Liu --- arch/x86/pci/common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 09d3afc0a181..b106c67f88c5 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -674,6 +674,14 @@ int pcibios_add_device(struct pci_dev *dev) int pcibios_alloc_irq(struct pci_dev *dev) { + /* + * If the PCI device was already claimed by core code and has + * MSI enabled, probing of the pcibios IRQ will overwrite + * dev->irq. So bail out if MSI is already enabled. + */ + if (pci_dev_msi_enabled(dev)) + return -EBUSY; + return pcibios_enable_irq(dev); } From bee67756eb4ae51ededeb8ce56e7f4fb91d30b43 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 12 Oct 2015 12:10:12 -0700 Subject: [PATCH 10/26] PCI: pciehp: Queue power work requests in dedicated function Up to now, work items to be queued to be handled by pciehp_power_thread() are allocated using kmalloc() in three different locations. If not needed, kfree() is called to free the allocated data. Introduce a separate function to allocate the work item and queue it, and call it only if needed. This reduces code duplication and avoids having to free memory if the work item does not need to get executed. [bhelgaas: tweak "no memory" message, make pciehp_queue_power_work() static] Signed-off-by: Guenter Roeck Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/pciehp_ctrl.c | 75 ++++++++++--------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index f3796124ad7c..4c8f4cde6854 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -204,36 +204,39 @@ static void pciehp_power_thread(struct work_struct *work) kfree(info); } -void pciehp_queue_pushbutton_work(struct work_struct *work) +static void pciehp_queue_power_work(struct slot *p_slot, int req) { - struct slot *p_slot = container_of(work, struct slot, work.work); struct power_work_info *info; + p_slot->state = (req == ENABLE_REQ) ? POWERON_STATE : POWEROFF_STATE; + info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) { - ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n", - __func__); + ctrl_err(p_slot->ctrl, "no memory to queue %s request\n", + (req == ENABLE_REQ) ? "poweron" : "poweroff"); return; } info->p_slot = p_slot; INIT_WORK(&info->work, pciehp_power_thread); + info->req = req; + queue_work(p_slot->wq, &info->work); +} + +void pciehp_queue_pushbutton_work(struct work_struct *work) +{ + struct slot *p_slot = container_of(work, struct slot, work.work); mutex_lock(&p_slot->lock); switch (p_slot->state) { case BLINKINGOFF_STATE: - p_slot->state = POWEROFF_STATE; - info->req = DISABLE_REQ; + pciehp_queue_power_work(p_slot, DISABLE_REQ); break; case BLINKINGON_STATE: - p_slot->state = POWERON_STATE; - info->req = ENABLE_REQ; + pciehp_queue_power_work(p_slot, ENABLE_REQ); break; default: - kfree(info); - goto out; + break; } - queue_work(p_slot->wq, &info->work); - out: mutex_unlock(&p_slot->lock); } @@ -301,27 +304,12 @@ static void handle_button_press_event(struct slot *p_slot) static void handle_surprise_event(struct slot *p_slot) { u8 getstatus; - struct power_work_info *info; - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n", - __func__); - return; - } - info->p_slot = p_slot; - INIT_WORK(&info->work, pciehp_power_thread); pciehp_get_adapter_status(p_slot, &getstatus); - if (!getstatus) { - p_slot->state = POWEROFF_STATE; - info->req = DISABLE_REQ; - } else { - p_slot->state = POWERON_STATE; - info->req = ENABLE_REQ; - } - - queue_work(p_slot->wq, &info->work); + if (!getstatus) + pciehp_queue_power_work(p_slot, DISABLE_REQ); + else + pciehp_queue_power_work(p_slot, ENABLE_REQ); } /* @@ -330,17 +318,6 @@ static void handle_surprise_event(struct slot *p_slot) static void handle_link_event(struct slot *p_slot, u32 event) { struct controller *ctrl = p_slot->ctrl; - struct power_work_info *info; - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n", - __func__); - return; - } - info->p_slot = p_slot; - info->req = event == INT_LINK_UP ? ENABLE_REQ : DISABLE_REQ; - INIT_WORK(&info->work, pciehp_power_thread); switch (p_slot->state) { case BLINKINGON_STATE: @@ -348,22 +325,19 @@ static void handle_link_event(struct slot *p_slot, u32 event) cancel_delayed_work(&p_slot->work); /* Fall through */ case STATIC_STATE: - p_slot->state = event == INT_LINK_UP ? - POWERON_STATE : POWEROFF_STATE; - queue_work(p_slot->wq, &info->work); + pciehp_queue_power_work(p_slot, event == INT_LINK_UP ? + ENABLE_REQ : DISABLE_REQ); break; case POWERON_STATE: if (event == INT_LINK_UP) { ctrl_info(ctrl, "Link Up event ignored on slot(%s): already powering on\n", slot_name(p_slot)); - kfree(info); } else { ctrl_info(ctrl, "Link Down event queued on slot(%s): currently getting powered on\n", slot_name(p_slot)); - p_slot->state = POWEROFF_STATE; - queue_work(p_slot->wq, &info->work); + pciehp_queue_power_work(p_slot, DISABLE_REQ); } break; case POWEROFF_STATE: @@ -371,19 +345,16 @@ static void handle_link_event(struct slot *p_slot, u32 event) ctrl_info(ctrl, "Link Up event queued on slot(%s): currently getting powered off\n", slot_name(p_slot)); - p_slot->state = POWERON_STATE; - queue_work(p_slot->wq, &info->work); + pciehp_queue_power_work(p_slot, ENABLE_REQ); } else { ctrl_info(ctrl, "Link Down event ignored on slot(%s): already powering off\n", slot_name(p_slot)); - kfree(info); } break; default: ctrl_err(ctrl, "ignoring invalid state %#x on slot(%s)\n", p_slot->state, slot_name(p_slot)); - kfree(info); break; } } From c56d4450eb6886225a5a0bb231ad2cea9f03284a Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Sun, 18 Oct 2015 19:55:04 +0530 Subject: [PATCH 11/26] PCI: Turn off Request Attributes to avoid Chelsio T5 Completion erratum The Chelsio T5 has a PCIe compliance erratum that causes Malformed TLP or Unexpected Completion errors in some systems, which may cause device access timeouts. Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same values for the Attribute as were supplied in the header of the corresponding Request, except as explicitly allowed when IDO is used." Instead of copying the Attributes from the Request to the Completion, the T5 always generates Completions with zero Attributes. The receiver of a Completion whose Attributes don't match the Request may accept it (which itself seems non-compliant based on sec 2.3.2), or it may handle it as a Malformed TLP or an Unexpected Completion, which will probably lead to a device access timeout. Work around this by disabling "Relaxed Ordering" and "No Snoop" in the Root Port so it always generate Requests with zero Attributes. This does affect all other devices which are downstream of that Root Port, but these are performance optimizations that should not make a functional difference. Note that Configuration Space accesses are never supposed to have TLP Attributes, so we're safe waiting till after any Configuration Space accesses to do the Root Port "fixup". Based on original work by Casey Leedom [bhelgaas: changelog, comments, rename to pci_find_pcie_root_port(), rework to use pci_upstream_bridge() and check for Root Port device type, edit diagnostics to clarify intent and devices affected] Signed-off-by: Hariprasad Shenai Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 24 +++++++++++++++++++ drivers/pci/quirks.c | 57 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 3 files changed, 82 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6a9a1116f1eb..09b4a35e21a8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -457,6 +457,30 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev, } EXPORT_SYMBOL(pci_find_parent_resource); +/** + * pci_find_pcie_root_port - return PCIe Root Port + * @dev: PCI device to query + * + * Traverse up the parent chain and return the PCIe Root Port PCI Device + * for a given PCI Device. + */ +struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev) +{ + struct pci_dev *bridge, *highest_pcie_bridge = NULL; + + bridge = pci_upstream_bridge(dev); + while (bridge && pci_is_pcie(bridge)) { + highest_pcie_bridge = bridge; + bridge = pci_upstream_bridge(bridge); + } + + if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT) + return NULL; + + return highest_pcie_bridge; +} +EXPORT_SYMBOL(pci_find_pcie_root_port); + /** * pci_wait_for_pending - wait for @mask bit(s) to clear in status word @pos * @dev: the PCI device to operate on diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6a30252cd79f..eb3c98e0fb2b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3691,6 +3691,63 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6868, PCI_CLASS_NOT_DEFINED, 8, DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6869, PCI_CLASS_NOT_DEFINED, 8, quirk_tw686x_class); +/* + * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same + * values for the Attribute as were supplied in the header of the + * corresponding Request, except as explicitly allowed when IDO is used." + * + * If a non-compliant device generates a completion with a different + * attribute than the request, the receiver may accept it (which itself + * seems non-compliant based on sec 2.3.2), or it may handle it as a + * Malformed TLP or an Unexpected Completion, which will probably lead to a + * device access timeout. + * + * If the non-compliant device generates completions with zero attributes + * (instead of copying the attributes from the request), we can work around + * this by disabling the "Relaxed Ordering" and "No Snoop" attributes in + * upstream devices so they always generate requests with zero attributes. + * + * This affects other devices under the same Root Port, but since these + * attributes are performance hints, there should be no functional problem. + * + * Note that Configuration Space accesses are never supposed to have TLP + * Attributes, so we're safe waiting till after any Configuration Space + * accesses to do the Root Port fixup. + */ +static void quirk_disable_root_port_attributes(struct pci_dev *pdev) +{ + struct pci_dev *root_port = pci_find_pcie_root_port(pdev); + + if (!root_port) { + dev_warn(&pdev->dev, "PCIe Completion erratum may cause device errors\n"); + return; + } + + dev_info(&root_port->dev, "Disabling No Snoop/Relaxed Ordering Attributes to avoid PCIe Completion erratum in %s\n", + dev_name(&pdev->dev)); + pcie_capability_clear_and_set_word(root_port, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN | + PCI_EXP_DEVCTL_NOSNOOP_EN, 0); +} + +/* + * The Chelsio T5 chip fails to copy TLP Attributes from a Request to the + * Completion it generates. + */ +static void quirk_chelsio_T5_disable_root_port_attributes(struct pci_dev *pdev) +{ + /* + * This mask/compare operation selects for Physical Function 4 on a + * T5. We only need to fix up the Root Port once for any of the + * PFs. PF[0..3] have PCI Device IDs of 0x50xx, but PF4 is uniquely + * 0x54xx so we use that one, + */ + if ((pdev->device & 0xff00) == 0x5400) + quirk_disable_root_port_attributes(pdev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, + quirk_chelsio_T5_disable_root_port_attributes); + /* * AMD has indicated that the devices below do not support peer-to-peer * in any system where they are found in the southbridge with an AMD diff --git a/include/linux/pci.h b/include/linux/pci.h index b54fbf1571b1..e828e7b4afec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -820,6 +820,7 @@ void pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev); u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); From ff45f9ddbcf5418ff6402432d14ad91a40d9bea9 Mon Sep 17 00:00:00 2001 From: Ben Shelton Date: Thu, 29 Oct 2015 16:20:31 -0500 Subject: [PATCH 12/26] PCI: Enable SR-IOV ARI Capable Hierarchy before reading TotalVFs For some SR-IOV devices, the number of available virtual functions, i.e., TotalVFs, increases after setting the ARI Capable Hierarchy bit in the SR-IOV Control register. This violates the SR-IOV spec, r1.1, sec 3.3.6, which says TotalVFs is HwInit, but we don't need TotalVFs before setting the ARI Capable bit anyway. Set the ARI Capable Hierarchy bit (if ARI is enabled in the upstream bridge) before reading TotalVFs. [bhelgaas: changelog] Signed-off-by: Ben Shelton Signed-off-by: Bjorn Helgaas --- drivers/pci/iov.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index ee0ebff103a4..0cdb2d1e1a20 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -399,10 +399,6 @@ static int sriov_init(struct pci_dev *dev, int pos) ssleep(1); } - pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); - if (!total) - return 0; - ctrl = 0; list_for_each_entry(pdev, &dev->bus->devices, bus_list) if (pdev->is_physfn) @@ -420,6 +416,10 @@ found: if (!offset || (total > 1 && !stride)) return -EIO; + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; pgsz &= ~((1 << i) - 1); From ea9a8854161d9580cfabe011c0ae296ecc0e1d4f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:20:50 -0500 Subject: [PATCH 13/26] PCI: Set SR-IOV NumVFs to zero after enumeration The enumeration path should leave NumVFs set to zero. But after 4449f079722c ("PCI: Calculate maximum number of buses required for VFs"), we call virtfn_max_buses() in the enumeration path, which changes NumVFs. This NumVFs change is visible via lspci and sysfs until a driver enables SR-IOV. Iterate from TotalVFs down to zero so NumVFs is zero when we're finished computing the maximum number of buses. Validate offset and stride in the loop, so we can test it at every possible NumVFs setting. Rename virtfn_max_buses() to compute_max_vf_buses() to hint that it does have a side effect of updating iov->max_VF_buses. [bhelgaas: changelog, rename, allow numVF==1 && stride==0, rework loop, reverse sense of error path] Fixes: 4449f079722c ("PCI: Calculate maximum number of buses required for VFs") Based-on-patch-by: Ethan Zhao Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas --- drivers/pci/iov.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0cdb2d1e1a20..1b1acc27141c 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -54,24 +54,29 @@ static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) * The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride * determine how many additional bus numbers will be consumed by VFs. * - * Iterate over all valid NumVFs and calculate the maximum number of bus - * numbers that could ever be required. + * Iterate over all valid NumVFs, validate offset and stride, and calculate + * the maximum number of bus numbers that could ever be required. */ -static inline u8 virtfn_max_buses(struct pci_dev *dev) +static int compute_max_vf_buses(struct pci_dev *dev) { struct pci_sriov *iov = dev->sriov; - int nr_virtfn; - u8 max = 0; - int busnr; + int nr_virtfn, busnr, rc = 0; - for (nr_virtfn = 1; nr_virtfn <= iov->total_VFs; nr_virtfn++) { + for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) { pci_iov_set_numvfs(dev, nr_virtfn); + if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) { + rc = -EIO; + goto out; + } + busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1); - if (busnr > max) - max = busnr; + if (busnr > iov->max_VF_buses) + iov->max_VF_buses = busnr; } - return max; +out: + pci_iov_set_numvfs(dev, 0); + return rc; } static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) @@ -384,7 +389,7 @@ static int sriov_init(struct pci_dev *dev, int pos) int rc; int nres; u32 pgsz; - u16 ctrl, total, offset, stride; + u16 ctrl, total; struct pci_sriov *iov; struct resource *res; struct pci_dev *pdev; @@ -410,11 +415,6 @@ static int sriov_init(struct pci_dev *dev, int pos) found: pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); - pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, 0); - pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); - pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); - if (!offset || (total > 1 && !stride)) - return -EIO; pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); if (!total) @@ -456,8 +456,6 @@ found: iov->nres = nres; iov->ctrl = ctrl; iov->total_VFs = total; - iov->offset = offset; - iov->stride = stride; iov->pgsz = pgsz; iov->self = dev; pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); @@ -474,10 +472,15 @@ found: dev->sriov = iov; dev->is_physfn = 1; - iov->max_VF_buses = virtfn_max_buses(dev); + rc = compute_max_vf_buses(dev); + if (rc) + goto fail_max_buses; return 0; +fail_max_buses: + dev->sriov = NULL; + dev->is_physfn = 0; failed: for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &dev->resource[i + PCI_IOV_RESOURCES]; From ce288ec380380f3ab0e3c631b9c2f598835a3f38 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:20:57 -0500 Subject: [PATCH 14/26] PCI: Remove redundant validation of SR-IOV offset/stride registers Previously, we read, validated, and cached PCI_SRIOV_VF_OFFSET and PCI_SRIOV_VF_STRIDE in sriov_enable(). But sriov_init() now does that via compute_max_vf_buses(), so we don't need to do it again. Remove the PCI_SRIOV_VF_OFFSET and PCI_SRIOV_VF_STRIDE config reads from sriov_enable(). The pci_sriov structure already contains the offset and stride corresponding to the current NumVFs. [bhelgaas: split to separate patch for reviewability] Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Yang --- drivers/pci/iov.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 1b1acc27141c..ca400a9140de 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -235,7 +235,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) int rc; int i, j; int nres; - u16 offset, stride, initial; + u16 initial; struct resource *res; struct pci_dev *pdev; struct pci_sriov *iov = dev->sriov; @@ -258,11 +258,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) return -EINVAL; - pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); - pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); - if (!offset || (nr_virtfn > 1 && !stride)) - return -EIO; - nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { bars |= (1 << (i + PCI_IOV_RESOURCES)); @@ -275,9 +270,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return -ENOMEM; } - iov->offset = offset; - iov->stride = stride; - bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1); if (bus > dev->bus->busn_res.end) { dev_err(&dev->dev, "can't enable %d VFs (bus %02x out of range of %pR)\n", From a2220d804bf09aae4da45dfd6cc6692e0e422926 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 29 Oct 2015 17:35:39 -0500 Subject: [PATCH 15/26] PCI: Handle IORESOURCE_PCI_FIXED when sizing resources The new Enhanced Allocation (EA) capability support (patches to follow) creates resources with the IORESOURCE_PCI_FIXED set. Since these resources cannot be relocated or resized, their alignment is not really defined, and it is therefore not specified. This causes a problem in pbus_size_mem() where resources with unspecified alignment are disabled. So, in pbus_size_mem() skip IORESOURCE_PCI_FIXED resources, instead of disabling them. [bhelgaas: folded in "flags & IORESOURCE_PCI_FIXED" fix from David] Signed-off-by: David Daney Signed-off-by: Bjorn Helgaas Acked-by: Sean O. Stalley --- drivers/pci/setup-bus.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 508cc56130e3..7bea2317df14 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1037,9 +1037,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, struct resource *r = &dev->resource[i]; resource_size_t r_size; - if (r->parent || ((r->flags & mask) != type && - (r->flags & mask) != type2 && - (r->flags & mask) != type3)) + if (r->parent || (r->flags & IORESOURCE_PCI_FIXED) || + ((r->flags & mask) != type && + (r->flags & mask) != type2 && + (r->flags & mask) != type3)) continue; r_size = resource_size(r); #ifdef CONFIG_PCI_IOV From d04d0111c7701cb7c696216b0af707f4e327ad11 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 29 Oct 2015 17:35:39 -0500 Subject: [PATCH 16/26] PCI: Handle IORESOURCE_PCI_FIXED when assigning resources The new Enhanced Allocation (EA) capability support (patches to follow) creates resources with the IORESOURCE_PCI_FIXED set. During resource assignment in pci_bus_assign_resources(), IORESOURCE_PCI_FIXED resources are not given a parent. This, in turn, causes pci_enable_resources() to fail with a "not claimed" error. So, in __pci_bus_assign_resources(), for IORESOURCE_PCI_FIXED resources, try to request the resource from a parent bus. Signed-off-by: David Daney Signed-off-by: Bjorn Helgaas Acked-by: Sean O. Stalley --- drivers/pci/setup-bus.c | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 7bea2317df14..1723ac1b30e1 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1341,6 +1341,47 @@ void pci_bus_size_bridges(struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_size_bridges); +static void assign_fixed_resource_on_bus(struct pci_bus *b, struct resource *r) +{ + int i; + struct resource *parent_r; + unsigned long mask = IORESOURCE_IO | IORESOURCE_MEM | + IORESOURCE_PREFETCH; + + pci_bus_for_each_resource(b, parent_r, i) { + if (!parent_r) + continue; + + if ((r->flags & mask) == (parent_r->flags & mask) && + resource_contains(parent_r, r)) + request_resource(parent_r, r); + } +} + +/* + * Try to assign any resources marked as IORESOURCE_PCI_FIXED, as they + * are skipped by pbus_assign_resources_sorted(). + */ +static void pdev_assign_fixed_resources(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct pci_bus *b; + struct resource *r = &dev->resource[i]; + + if (r->parent || !(r->flags & IORESOURCE_PCI_FIXED) || + !(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + + b = dev->bus; + while (b && !r->parent) { + assign_fixed_resource_on_bus(b, r); + b = b->parent; + } + } +} + void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head) @@ -1351,6 +1392,8 @@ void __pci_bus_assign_resources(const struct pci_bus *bus, pbus_assign_resources_sorted(bus, realloc_head, fail_head); list_for_each_entry(dev, &bus->devices, bus_list) { + pdev_assign_fixed_resources(dev); + b = dev->subordinate; if (!b) continue; From f80b0ba959641f13524507d60eae4d01ea65c991 Mon Sep 17 00:00:00 2001 From: "Sean O. Stalley" Date: Thu, 29 Oct 2015 17:35:39 -0500 Subject: [PATCH 17/26] PCI: Add Enhanced Allocation register entries Add registers defined in PCI-SIG's Enhanced allocation ECN. [bhelgaas: s/WRITEABLE/WRITABLE] Signed-off-by: Sean O. Stalley [david.daney@cavium.com: Added more definitions for PCI_EA_BEI_*] Signed-off-by: Signed-off-by: David Daney Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 43 ++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 413417f3707b..f890876aa070 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -216,7 +216,8 @@ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ #define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ -#define PCI_CAP_ID_MAX PCI_CAP_ID_AF +#define PCI_CAP_ID_EA 0x14 /* PCI Enhanced Allocation */ +#define PCI_CAP_ID_MAX PCI_CAP_ID_EA #define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ #define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ #define PCI_CAP_SIZEOF 4 @@ -353,6 +354,46 @@ #define PCI_AF_STATUS_TP 0x01 #define PCI_CAP_AF_SIZEOF 6 /* size of AF registers */ +/* PCI Enhanced Allocation registers */ + +#define PCI_EA_NUM_ENT 2 /* Number of Capability Entries */ +#define PCI_EA_NUM_ENT_MASK 0x3f /* Num Entries Mask */ +#define PCI_EA_FIRST_ENT 4 /* First EA Entry in List */ +#define PCI_EA_FIRST_ENT_BRIDGE 8 /* First EA Entry for Bridges */ +#define PCI_EA_ES 0x00000007 /* Entry Size */ +#define PCI_EA_BEI(x) (((x) >> 4) & 0xf) /* BAR Equivalent Indicator */ +/* 0-5 map to BARs 0-5 respectively */ +#define PCI_EA_BEI_BAR0 0 +#define PCI_EA_BEI_BAR5 5 +#define PCI_EA_BEI_BRIDGE 6 /* Resource behind bridge */ +#define PCI_EA_BEI_ENI 7 /* Equivalent Not Indicated */ +#define PCI_EA_BEI_ROM 8 /* Expansion ROM */ +/* 9-14 map to VF BARs 0-5 respectively */ +#define PCI_EA_BEI_VF_BAR0 9 +#define PCI_EA_BEI_VF_BAR5 14 +#define PCI_EA_BEI_RESERVED 15 /* Reserved - Treat like ENI */ +#define PCI_EA_PP(x) (((x) >> 8) & 0xff) /* Primary Properties */ +#define PCI_EA_SP(x) (((x) >> 16) & 0xff) /* Secondary Properties */ +#define PCI_EA_P_MEM 0x00 /* Non-Prefetch Memory */ +#define PCI_EA_P_MEM_PREFETCH 0x01 /* Prefetchable Memory */ +#define PCI_EA_P_IO 0x02 /* I/O Space */ +#define PCI_EA_P_VF_MEM_PREFETCH 0x03 /* VF Prefetchable Memory */ +#define PCI_EA_P_VF_MEM 0x04 /* VF Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM 0x05 /* Bridge Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM_PREFETCH 0x06 /* Bridge Prefetchable Memory */ +#define PCI_EA_P_BRIDGE_IO 0x07 /* Bridge I/O Space */ +/* 0x08-0xfc reserved */ +#define PCI_EA_P_MEM_RESERVED 0xfd /* Reserved Memory */ +#define PCI_EA_P_IO_RESERVED 0xfe /* Reserved I/O Space */ +#define PCI_EA_P_UNAVAILABLE 0xff /* Entry Unavailable */ +#define PCI_EA_WRITABLE 0x40000000 /* Writable: 1 = RW, 0 = HwInit */ +#define PCI_EA_ENABLE 0x80000000 /* Enable for this entry */ +#define PCI_EA_BASE 4 /* Base Address Offset */ +#define PCI_EA_MAX_OFFSET 8 /* MaxOffset (resource length) */ +/* bit 0 is reserved */ +#define PCI_EA_IS_64 0x00000002 /* 64-bit field flag */ +#define PCI_EA_FIELD_MASK 0xfffffffc /* For Base & Max Offset */ + /* PCI-X registers (Type 0 (non-bridge) devices) */ #define PCI_X_CMD 2 /* Modes & Features */ From 938174e59f83354251a75a2b90171e0ffdf6f1d7 Mon Sep 17 00:00:00 2001 From: "Sean O. Stalley" Date: Thu, 29 Oct 2015 17:35:39 -0500 Subject: [PATCH 18/26] PCI: Add support for Enhanced Allocation devices Add support for devices using Enhanced Allocation entries instead of BARs. This allows the kernel to parse the EA Extended Capability structure in PCI config space and claim the BAR-equivalent resources. See https://pcisig.com/sites/default/files/specification_documents/ECN_Enhanced_Allocation_23_Oct_2014_Final.pdf [bhelgaas: add spec URL, s/pci_ea_set_flags/pci_ea_flags/, consolidate declarations, print unknown property in hex to match spec] Signed-off-by: Sean O. Stalley [david.daney@cavium.com: Add more support/checking for Entry Properties, allow EA behind bridges, rewrite some error messages.] Signed-off-by: David Daney Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 173 ++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 1 + drivers/pci/probe.c | 3 + 3 files changed, 177 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6a9a1116f1eb..8d796c02ee90 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2148,6 +2148,179 @@ void pci_pm_init(struct pci_dev *dev) } } +static unsigned long pci_ea_flags(struct pci_dev *dev, u8 prop) +{ + unsigned long flags = IORESOURCE_PCI_FIXED; + + switch (prop) { + case PCI_EA_P_MEM: + case PCI_EA_P_VF_MEM: + flags |= IORESOURCE_MEM; + break; + case PCI_EA_P_MEM_PREFETCH: + case PCI_EA_P_VF_MEM_PREFETCH: + flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; + break; + case PCI_EA_P_IO: + flags |= IORESOURCE_IO; + break; + default: + return 0; + } + + return flags; +} + +static struct resource *pci_ea_get_resource(struct pci_dev *dev, u8 bei, + u8 prop) +{ + if (bei <= PCI_EA_BEI_BAR5 && prop <= PCI_EA_P_IO) + return &dev->resource[bei]; + else if (bei == PCI_EA_BEI_ROM) + return &dev->resource[PCI_ROM_RESOURCE]; + else + return NULL; +} + +/* Read an Enhanced Allocation (EA) entry */ +static int pci_ea_read(struct pci_dev *dev, int offset) +{ + struct resource *res; + int ent_size, ent_offset = offset; + resource_size_t start, end; + unsigned long flags; + u32 dw0, base, max_offset; + u8 prop; + bool support_64 = (sizeof(resource_size_t) >= 8); + + pci_read_config_dword(dev, ent_offset, &dw0); + ent_offset += 4; + + /* Entry size field indicates DWORDs after 1st */ + ent_size = ((dw0 & PCI_EA_ES) + 1) << 2; + + if (!(dw0 & PCI_EA_ENABLE)) /* Entry not enabled */ + goto out; + + prop = PCI_EA_PP(dw0); + /* + * If the Property is in the reserved range, try the Secondary + * Property instead. + */ + if (prop > PCI_EA_P_BRIDGE_IO && prop < PCI_EA_P_MEM_RESERVED) + prop = PCI_EA_SP(dw0); + if (prop > PCI_EA_P_BRIDGE_IO) + goto out; + + res = pci_ea_get_resource(dev, PCI_EA_BEI(dw0), prop); + if (!res) { + dev_err(&dev->dev, "Unsupported EA entry BEI: %u\n", + PCI_EA_BEI(dw0)); + goto out; + } + + flags = pci_ea_flags(dev, prop); + if (!flags) { + dev_err(&dev->dev, "Unsupported EA properties: %#x\n", prop); + goto out; + } + + /* Read Base */ + pci_read_config_dword(dev, ent_offset, &base); + start = (base & PCI_EA_FIELD_MASK); + ent_offset += 4; + + /* Read MaxOffset */ + pci_read_config_dword(dev, ent_offset, &max_offset); + ent_offset += 4; + + /* Read Base MSBs (if 64-bit entry) */ + if (base & PCI_EA_IS_64) { + u32 base_upper; + + pci_read_config_dword(dev, ent_offset, &base_upper); + ent_offset += 4; + + flags |= IORESOURCE_MEM_64; + + /* entry starts above 32-bit boundary, can't use */ + if (!support_64 && base_upper) + goto out; + + if (support_64) + start |= ((u64)base_upper << 32); + } + + end = start + (max_offset | 0x03); + + /* Read MaxOffset MSBs (if 64-bit entry) */ + if (max_offset & PCI_EA_IS_64) { + u32 max_offset_upper; + + pci_read_config_dword(dev, ent_offset, &max_offset_upper); + ent_offset += 4; + + flags |= IORESOURCE_MEM_64; + + /* entry too big, can't use */ + if (!support_64 && max_offset_upper) + goto out; + + if (support_64) + end += ((u64)max_offset_upper << 32); + } + + if (end < start) { + dev_err(&dev->dev, "EA Entry crosses address boundary\n"); + goto out; + } + + if (ent_size != ent_offset - offset) { + dev_err(&dev->dev, + "EA Entry Size (%d) does not match length read (%d)\n", + ent_size, ent_offset - offset); + goto out; + } + + res->name = pci_name(dev); + res->start = start; + res->end = end; + res->flags = flags; + dev_printk(KERN_DEBUG, &dev->dev, "EA - BEI %2u, Prop 0x%02x: %pR\n", + PCI_EA_BEI(dw0), prop, res); +out: + return offset + ent_size; +} + +/* Enhanced Allocation Initalization */ +void pci_ea_init(struct pci_dev *dev) +{ + int ea; + u8 num_ent; + int offset; + int i; + + /* find PCI EA capability in list */ + ea = pci_find_capability(dev, PCI_CAP_ID_EA); + if (!ea) + return; + + /* determine the number of entries */ + pci_bus_read_config_byte(dev->bus, dev->devfn, ea + PCI_EA_NUM_ENT, + &num_ent); + num_ent &= PCI_EA_NUM_ENT_MASK; + + offset = ea + PCI_EA_FIRST_ENT; + + /* Skip DWORD 2 for type 1 functions */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + offset += 4; + + /* parse each EA entry */ + for (i = 0; i < num_ent; ++i) + offset = pci_ea_read(dev, offset); +} + static void pci_add_saved_cap(struct pci_dev *pci_dev, struct pci_cap_saved_state *new_cap) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 24ba9dc8910a..a1607331693e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -78,6 +78,7 @@ bool pci_dev_keep_suspended(struct pci_dev *dev); void pci_config_pm_runtime_get(struct pci_dev *dev); void pci_config_pm_runtime_put(struct pci_dev *dev); void pci_pm_init(struct pci_dev *dev); +void pci_ea_init(struct pci_dev *dev); void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0b2be174d981..4293eec540a2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1598,6 +1598,9 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) static void pci_init_capabilities(struct pci_dev *dev) { + /* Enhanced Allocation */ + pci_ea_init(dev); + /* MSI/MSI-X list */ pci_msi_init_pci_dev(dev); From 111839917fccf4f74d50ad937321acc6a07eb9c3 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 29 Oct 2015 17:35:40 -0500 Subject: [PATCH 19/26] PCI: Handle Enhanced Allocation capability for SR-IOV devices SR-IOV BARs can be specified via EA entries. Extend the EA parser to extract the SRIOV BAR resources, and modify sriov_init() to use resources previously obtained via EA. Signed-off-by: David Daney Signed-off-by: Bjorn Helgaas Acked-by: Sean O. Stalley --- drivers/pci/iov.c | 11 +++++++++-- drivers/pci/pci.c | 6 ++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index ee0ebff103a4..c789e68df358 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -436,8 +436,15 @@ found: nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &dev->resource[i + PCI_IOV_RESOURCES]; - bar64 = __pci_read_base(dev, pci_bar_unknown, res, - pos + PCI_SRIOV_BAR + i * 4); + /* + * If it is already FIXED, don't change it, something + * (perhaps EA or header fixups) wants it this way. + */ + if (res->flags & IORESOURCE_PCI_FIXED) + bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0; + else + bar64 = __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); if (!res->flags) continue; if (resource_size(res) & (PAGE_SIZE - 1)) { diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8d796c02ee90..364f98b30130 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2176,6 +2176,12 @@ static struct resource *pci_ea_get_resource(struct pci_dev *dev, u8 bei, { if (bei <= PCI_EA_BEI_BAR5 && prop <= PCI_EA_P_IO) return &dev->resource[bei]; +#ifdef CONFIG_PCI_IOV + else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5 && + (prop == PCI_EA_P_VF_MEM || prop == PCI_EA_P_VF_MEM_PREFETCH)) + return &dev->resource[PCI_IOV_RESOURCES + + bei - PCI_EA_BEI_VF_BAR0]; +#endif else if (bei == PCI_EA_BEI_ROM) return &dev->resource[PCI_ROM_RESOURCE]; else From 26635112d4d057c9ea38f6284423a30a41a09240 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 29 Oct 2015 17:35:40 -0500 Subject: [PATCH 20/26] PCI: Make Enhanced Allocation bitmasks more obvious Expand bitmask #defines completely. This puts the shift in the code instead of in the #define, but it makes it more obvious in the header file how fields in the register are laid out. No functional change. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 15 ++++++++------- include/uapi/linux/pci_regs.h | 6 +++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 364f98b30130..e8330b68cecd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2195,7 +2195,7 @@ static int pci_ea_read(struct pci_dev *dev, int offset) int ent_size, ent_offset = offset; resource_size_t start, end; unsigned long flags; - u32 dw0, base, max_offset; + u32 dw0, bei, base, max_offset; u8 prop; bool support_64 = (sizeof(resource_size_t) >= 8); @@ -2208,20 +2208,21 @@ static int pci_ea_read(struct pci_dev *dev, int offset) if (!(dw0 & PCI_EA_ENABLE)) /* Entry not enabled */ goto out; - prop = PCI_EA_PP(dw0); + bei = (dw0 & PCI_EA_BEI) >> 4; + prop = (dw0 & PCI_EA_PP) >> 8; + /* * If the Property is in the reserved range, try the Secondary * Property instead. */ if (prop > PCI_EA_P_BRIDGE_IO && prop < PCI_EA_P_MEM_RESERVED) - prop = PCI_EA_SP(dw0); + prop = (dw0 & PCI_EA_SP) >> 16; if (prop > PCI_EA_P_BRIDGE_IO) goto out; - res = pci_ea_get_resource(dev, PCI_EA_BEI(dw0), prop); + res = pci_ea_get_resource(dev, bei, prop); if (!res) { - dev_err(&dev->dev, "Unsupported EA entry BEI: %u\n", - PCI_EA_BEI(dw0)); + dev_err(&dev->dev, "Unsupported EA entry BEI: %u\n", bei); goto out; } @@ -2293,7 +2294,7 @@ static int pci_ea_read(struct pci_dev *dev, int offset) res->end = end; res->flags = flags; dev_printk(KERN_DEBUG, &dev->dev, "EA - BEI %2u, Prop 0x%02x: %pR\n", - PCI_EA_BEI(dw0), prop, res); + bei, prop, res); out: return offset + ent_size; } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f890876aa070..1becea86c73c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -361,7 +361,7 @@ #define PCI_EA_FIRST_ENT 4 /* First EA Entry in List */ #define PCI_EA_FIRST_ENT_BRIDGE 8 /* First EA Entry for Bridges */ #define PCI_EA_ES 0x00000007 /* Entry Size */ -#define PCI_EA_BEI(x) (((x) >> 4) & 0xf) /* BAR Equivalent Indicator */ +#define PCI_EA_BEI 0x000000f0 /* BAR Equivalent Indicator */ /* 0-5 map to BARs 0-5 respectively */ #define PCI_EA_BEI_BAR0 0 #define PCI_EA_BEI_BAR5 5 @@ -372,8 +372,8 @@ #define PCI_EA_BEI_VF_BAR0 9 #define PCI_EA_BEI_VF_BAR5 14 #define PCI_EA_BEI_RESERVED 15 /* Reserved - Treat like ENI */ -#define PCI_EA_PP(x) (((x) >> 8) & 0xff) /* Primary Properties */ -#define PCI_EA_SP(x) (((x) >> 16) & 0xff) /* Secondary Properties */ +#define PCI_EA_PP 0x0000ff00 /* Primary Properties */ +#define PCI_EA_SP 0x00ff0000 /* Secondary Properties */ #define PCI_EA_P_MEM 0x00 /* Non-Prefetch Memory */ #define PCI_EA_P_MEM_PREFETCH 0x01 /* Prefetchable Memory */ #define PCI_EA_P_IO 0x02 /* I/O Space */ From 597becb4adb7d57b8e9d857a68f764ce96a141b8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 29 Oct 2015 17:35:40 -0500 Subject: [PATCH 21/26] PCI: Expand Enhanced Allocation BAR output An Enhanced Allocation Capability entry with BEI 0 fills in dev->resource[0] just like a real BAR 0 would, but non-EA experts might not connect "EA - BEI 0" with BAR 0. Decode the EA jargon a little bit, e.g., change this: pci 0002:01:00.0: EA - BEI 0, Prop 0x00: [mem 0x84300000-0x84303fff] to this: pci 0002:01:00.0: BAR 0: [mem 0x84300000-0x84303fff] (from Enhanced Allocation, properties 0x00) Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e8330b68cecd..3a1454e6dd4a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2293,8 +2293,20 @@ static int pci_ea_read(struct pci_dev *dev, int offset) res->start = start; res->end = end; res->flags = flags; - dev_printk(KERN_DEBUG, &dev->dev, "EA - BEI %2u, Prop 0x%02x: %pR\n", - bei, prop, res); + + if (bei <= PCI_EA_BEI_BAR5) + dev_printk(KERN_DEBUG, &dev->dev, "BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", + bei, res, prop); + else if (bei == PCI_EA_BEI_ROM) + dev_printk(KERN_DEBUG, &dev->dev, "ROM: %pR (from Enhanced Allocation, properties %#02x)\n", + res, prop); + else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5) + dev_printk(KERN_DEBUG, &dev->dev, "VF BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n", + bei - PCI_EA_BEI_VF_BAR0, res, prop); + else + dev_printk(KERN_DEBUG, &dev->dev, "BEI %d res: %pR (from Enhanced Allocation, properties %#02x)\n", + bei, res, prop); + out: return offset + ent_size; } From af86fa4001ca5644ae20cc2c52cdf67bb7db9752 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 20 Oct 2015 11:36:53 -0500 Subject: [PATCH 22/26] sparc/PCI: Add mem64 resource parsing for root bus David reported that a T5-8 sparc system failed to boot with: pci_sun4v f02dbcfc: PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [io 0x804000000000-0x80400fffffff] (bus address [0x0000-0xfffffff]) pci_bus 0000:00: root bus resource [mem 0x800000000000-0x80007effffff] (bus address [0x00000000-0x7effffff]) pci 0000:00:01.0: can't claim BAR 15 [mem 0x100000000-0x4afffffff pref]: no compatible bridge window Note that we don't know about a host bridge aperture that contains BAR 15. OF does report a MEM64 aperture, but before this patch, pci_determine_mem_io_space() ignored it. Add support for host bridge apertures with 64-bit PCI addresses. Also set IORESOURCE_MEM_64 for PCI device and bridge resources in PCI 64-bit memory space. Sparc doesn't actually print the device and bridge resources, but after this patch, we should have the equivalent of this: pci_sun4v f02dbcfc: PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [io 0x804000000000-0x80400fffffff] (bus address [0x0000-0xfffffff]) pci_bus 0000:00: root bus resource [mem 0x800000000000-0x80007effffff] (bus address [0x00000000-0x7effffff]) pci_bus 0000:00: root bus resource [mem 0x800100000000-0x8007ffffffff] (bus address [0x100000000-0x7ffffffff]) pci 0000:00:01.0: bridge window [mem 0x800100000000-0x8004afffffff 64bit pref] [bhelgaas: changelog, URL to David's report] Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") Link: http://lkml.kernel.org/r/5514391F.2030300@oracle.com Reported-by: David Ahern Tested-by: David Ahern Tested-by: Khalid Aziz Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/sparc/kernel/pci.c | 7 ++++++- arch/sparc/kernel/pci_common.c | 17 +++++++++++++++-- arch/sparc/kernel/pci_impl.h | 1 + 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index b91d7f146175..badf0951d73c 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -185,8 +185,10 @@ static unsigned long pci_parse_of_flags(u32 addr0) if (addr0 & 0x02000000) { flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; - flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; + if (addr0 & 0x01000000) + flags |= IORESOURCE_MEM_64 + | PCI_BASE_ADDRESS_MEM_TYPE_64; if (addr0 & 0x40000000) flags |= IORESOURCE_PREFETCH | PCI_BASE_ADDRESS_MEM_PREFETCH; @@ -655,6 +657,9 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, pbm->io_space.start); pci_add_resource_offset(&resources, &pbm->mem_space, pbm->mem_space.start); + if (pbm->mem64_space.flags) + pci_add_resource_offset(&resources, &pbm->mem64_space, + pbm->mem_space.start); pbm->busn.start = pbm->pci_first_busno; pbm->busn.end = pbm->pci_last_busno; pbm->busn.flags = IORESOURCE_BUS; diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 944a06536ecc..33524c1d5328 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -406,6 +406,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) } num_pbm_ranges = i / sizeof(*pbm_ranges); + memset(&pbm->mem64_space, 0, sizeof(struct resource)); for (i = 0; i < num_pbm_ranges; i++) { const struct linux_prom_pci_ranges *pr = &pbm_ranges[i]; @@ -451,7 +452,12 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) break; case 3: - /* XXX 64-bit MEM handling XXX */ + /* 64-bit MEM handling */ + pbm->mem64_space.start = a; + pbm->mem64_space.end = a + size - 1UL; + pbm->mem64_space.flags = IORESOURCE_MEM; + saw_mem = 1; + break; default: break; @@ -465,15 +471,22 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) prom_halt(); } - printk("%s: PCI IO[%llx] MEM[%llx]\n", + printk("%s: PCI IO[%llx] MEM[%llx]", pbm->name, pbm->io_space.start, pbm->mem_space.start); + if (pbm->mem64_space.flags) + printk(" MEM64[%llx]", + pbm->mem64_space.start); + printk("\n"); pbm->io_space.name = pbm->mem_space.name = pbm->name; + pbm->mem64_space.name = pbm->name; request_resource(&ioport_resource, &pbm->io_space); request_resource(&iomem_resource, &pbm->mem_space); + if (pbm->mem64_space.flags) + request_resource(&iomem_resource, &pbm->mem64_space); pci_register_legacy_regions(&pbm->io_space, &pbm->mem_space); diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index 75803c780af3..37222ca849df 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -97,6 +97,7 @@ struct pci_pbm_info { /* PBM I/O and Memory space resources. */ struct resource io_space; struct resource mem_space; + struct resource mem64_space; struct resource busn; /* Base of PCI Config space, can be per-PBM or shared. */ From 3443c38205330bc5783a516ef1d0b6f182ee9a85 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:21:05 -0500 Subject: [PATCH 23/26] PCI: Remove VFs in reverse order if virtfn_add() fails If virtfn_add() fails, we call virtfn_remove() for any previously added devices. Remove the devices in reverse order (first-added is last-removed), which is more natural and doesn't require an additional variable. [bhelgaas: changelog, split to separate patch for reviewability] Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Yang --- drivers/pci/iov.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index ca400a9140de..c86d94c0c7a4 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -233,7 +233,7 @@ int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) static int sriov_enable(struct pci_dev *dev, int nr_virtfn) { int rc; - int i, j; + int i; int nres; u16 initial; struct resource *res; @@ -328,8 +328,8 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return 0; failed: - for (j = 0; j < i; j++) - virtfn_remove(dev, j, 0); + while (i--) + virtfn_remove(dev, i, 0); iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); From a39e3fcd72356e2bcb46fd99ec4e98064bd806b2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:21:11 -0500 Subject: [PATCH 24/26] PCI: Reorder pcibios_sriov_disable() Move pcibios_sriov_disable() up so it's defined before a future use. [bhelgaas: split to separate patch for reviewability] Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Yang --- drivers/pci/iov.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index c86d94c0c7a4..fada98da731b 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -227,7 +227,12 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { - return 0; + return 0; +} + +int __weak pcibios_sriov_disable(struct pci_dev *pdev) +{ + return 0; } static int sriov_enable(struct pci_dev *dev, int nr_virtfn) @@ -344,11 +349,6 @@ failed: return rc; } -int __weak pcibios_sriov_disable(struct pci_dev *pdev) -{ - return 0; -} - static void sriov_disable(struct pci_dev *dev) { int i; From b390864482924a21f8d7e25636d0140de93fba7e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:21:16 -0500 Subject: [PATCH 25/26] PCI: Wait 1 second between disabling VFs and clearing NumVFs Per sec 3.3.3.1 of the SR-IOV spec, r1.1, we must allow 1.0s after clearing VF Enable before reading any field in the SR-IOV Extended Capability. Wait 1 second before calling pci_iov_set_numvfs(), which reads PCI_SRIOV_VF_OFFSET and PCI_SRIOV_VF_STRIDE after it sets PCI_SRIOV_NUM_VF. [bhelgaas: split to separate patch for reviewability, add spec reference] Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas --- drivers/pci/iov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index fada98da731b..24428d5f60cd 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -339,13 +339,13 @@ failed: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); - pci_iov_set_numvfs(dev, 0); ssleep(1); pci_cfg_access_unlock(dev); if (iov->link != dev->devfn) sysfs_remove_link(&dev->dev.kobj, "dep_link"); + pci_iov_set_numvfs(dev, 0); return rc; } From c23b613507b8fa10e1839fb5a42e1b0318bf1d27 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 29 Oct 2015 16:21:20 -0500 Subject: [PATCH 26/26] PCI: Fix sriov_enable() error path for pcibios_enable_sriov() failures Disable VFs if pcibios_enable_sriov() fails, just like we do for other errors in sriov_enable(). Call pcibios_sriov_disable() if virtfn_add() fails. [bhelgaas: changelog, split to separate patch for reviewability] Fixes: 995df527f399 ("PCI: Add pcibios_sriov_enable() and pcibios_sriov_disable()") Signed-off-by: Alexander Duyck Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Yang --- drivers/pci/iov.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 24428d5f60cd..bd1c4faedea2 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -246,7 +246,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) struct pci_sriov *iov = dev->sriov; int bars = 0; int bus; - int retval; if (!nr_virtfn) return 0; @@ -315,10 +314,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) if (nr_virtfn < initial) initial = nr_virtfn; - if ((retval = pcibios_sriov_enable(dev, initial))) { - dev_err(&dev->dev, "failure %d from pcibios_sriov_enable()\n", - retval); - return retval; + rc = pcibios_sriov_enable(dev, initial); + if (rc) { + dev_err(&dev->dev, "failure %d from pcibios_sriov_enable()\n", rc); + goto err_pcibios; } for (i = 0; i < initial; i++) { @@ -336,6 +335,8 @@ failed: while (i--) virtfn_remove(dev, i, 0); + pcibios_sriov_disable(dev); +err_pcibios: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);