From dc5f219e88294b93009eef946251251ffffb6d60 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 Feb 2011 13:19:20 +0100 Subject: [PATCH 01/17] genirq: Add IRQF_FORCE_RESUME Xen needs to reenable interrupts which are marked IRQF_NO_SUSPEND in the resume path. Add a flag to force the reenabling in the resume code. Tested-and-acked-by: Ian Campbell Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 3 ++- kernel/irq/manage.c | 11 ++++++++++- kernel/irq/pm.c | 3 --- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 55e0d4253e49..d746da19c6a2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -55,7 +55,7 @@ * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend - * + * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set */ #define IRQF_DISABLED 0x00000020 #define IRQF_SAMPLE_RANDOM 0x00000040 @@ -67,6 +67,7 @@ #define IRQF_IRQPOLL 0x00001000 #define IRQF_ONESHOT 0x00002000 #define IRQF_NO_SUSPEND 0x00004000 +#define IRQF_FORCE_RESUME 0x00008000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0caa59f747dd..b4198ee8cfdf 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq); void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) { - if (resume) + if (resume) { + if (!(desc->status & IRQ_SUSPENDED)) { + if (!desc->action) + return; + if (!(desc->action->flags & IRQF_FORCE_RESUME)) + return; + /* Pretend that it got disabled ! */ + desc->depth++; + } desc->status &= ~IRQ_SUSPENDED; + } switch (desc->depth) { case 0: diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 0d4005d85b03..d6bfb89cce91 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -53,9 +53,6 @@ void resume_device_irqs(void) for_each_irq_desc(irq, desc) { unsigned long flags; - if (!(desc->status & IRQ_SUSPENDED)) - continue; - raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); raw_spin_unlock_irqrestore(&desc->lock, flags); From db2e2e6ee9ee9ce93b04c6975fdfef304771d6ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 24 Jan 2011 15:43:03 +0100 Subject: [PATCH 02/17] xen-pcifront: don't use flush_scheduled_work() flush_scheduled_work() is scheduled for deprecation. Cancel ->op_work directly instead. Signed-off-by: Tejun Heo Cc: Ryan Wilson Cc: Jan Beulich Cc: Jesse Barnes Signed-off-by: Konrad Rzeszutek Wilk --- drivers/pci/xen-pcifront.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 3a5a6fcc0ead..030ce3743439 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -733,8 +733,7 @@ static void free_pdev(struct pcifront_device *pdev) pcifront_free_roots(pdev); - /*For PCIE_AER error handling job*/ - flush_scheduled_work(); + cancel_work_sync(&pdev->op_work); if (pdev->irq >= 0) unbind_from_irqhandler(pdev->irq, pdev); From 1d4610527bc71d3f9eea520fc51a02d54f79dcd0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Feb 2011 13:43:22 -0500 Subject: [PATCH 03/17] xen-pcifront: Sanity check the MSI/MSI-X values Check the returned vector values for any values that are odd or plain incorrect (say vector value zero), and if so print a warning. Also fixup the return values. This patch was precipiated by the Xen PCIBack returning the incorrect values due to how it was retrieving PIRQ values. This has been fixed in the xen-pciback by "xen/pciback: Utilize 'xen_pirq_from_irq' to get PIRQ value" patch. Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/pci/xen-pcifront.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 030ce3743439..5c7b6ad68056 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev, if (likely(!err)) { if (likely(!op.value)) { /* we get the result */ - for (i = 0; i < nvec; i++) + for (i = 0; i < nvec; i++) { + if (op.msix_entries[i].vector <= 0) { + dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n", + i, op.msix_entries[i].vector); + err = -EINVAL; + *(*vector+i) = -1; + continue; + } *(*vector+i) = op.msix_entries[i].vector; - return 0; + } } else { printk(KERN_DEBUG "enable msix get value %x\n", op.value); - return op.value; } } else { dev_err(&dev->dev, "enable msix get err %x\n", err); - return err; } + return err; } static void pci_frontend_disable_msix(struct pci_dev *dev) @@ -325,6 +331,12 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) err = do_pci_op(pdev, &op); if (likely(!err)) { *(*vector) = op.value; + if (op.value <= 0) { + dev_warn(&dev->dev, "MSI entry is invalid: %d!\n", + op.value); + err = -EINVAL; + *(*vector) = -1; + } } else { dev_err(&dev->dev, "pci frontend enable msi failed for dev " "%x:%x\n", op.bus, op.devfn); From 55cb8cd45e0600df1473489518d7f12ce1bbe973 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Feb 2011 13:43:04 -0500 Subject: [PATCH 04/17] pci/xen: Use xen_allocate_pirq_msi instead of xen_allocate_pirq xen_allocate_pirq -> xen_map_pirq_gsi -> PHYSDEVOP_alloc_irq_vector IFF xen_initial_domain() in addition to the kernel side book-keeping side of things (set chip and handler, update irq_info etc) whereas xen_allocate_pirq_msi just does the kernel book keeping. Also xen_allocate_pirq allocates an IRQ in the 1-1 GSI space whereas xen_allocate_pirq_msi allocates a dynamic one in the >GSI IRQ space. All of this is uneccessary as this code path is only executed when we run as a domU PV guest with an MSI/MSI-X PCI card passed in. Hence we can jump straight to allocating an dynamic IRQ (and binding it to the proper PIRQ) and skip the rest. In short: this change is a cosmetic one. Reviewed-by: Ian Campbell Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..6432f751ee4f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -157,14 +157,14 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_allocate_pirq(v[i], 0, /* not sharable */ + xen_allocate_pirq_msi( (type == PCI_CAP_ID_MSIX) ? - "pcifront-msi-x" : "pcifront-msi"); + "pcifront-msi-x" : "pcifront-msi", + &irq, &v[i], XEN_ALLOC_IRQ); if (irq < 0) { ret = -1; goto free; } - ret = set_irq_msi(irq, msidesc); if (ret) goto error_while; From cc0f89c4a426fcd6400a89e9e34e4a8851abef76 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 17 Feb 2011 12:02:23 -0500 Subject: [PATCH 05/17] pci/xen: Cleanup: convert int** to int[] Cleanup code. Cosmetic change to make the code look easier to read. Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/pci.h | 8 ++++---- arch/x86/pci/xen.c | 4 ++-- drivers/pci/xen-pcifront.c | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 2329b3eaf8d3..aa8620989162 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void) * its own functions. */ struct xen_pci_frontend_ops { - int (*enable_msi)(struct pci_dev *dev, int **vectors); + int (*enable_msi)(struct pci_dev *dev, int vectors[]); void (*disable_msi)(struct pci_dev *dev); - int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); void (*disable_msix)(struct pci_dev *dev); }; extern struct xen_pci_frontend_ops *xen_pci_frontend; static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, - int **vectors) + int vectors[]) { if (xen_pci_frontend && xen_pci_frontend->enable_msi) return xen_pci_frontend->enable_msi(dev, vectors); @@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) xen_pci_frontend->disable_msi(dev); } static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, - int **vectors, int nvec) + int vectors[], int nvec) { if (xen_pci_frontend && xen_pci_frontend->enable_msix) return xen_pci_frontend->enable_msix(dev, vectors, nvec); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 6432f751ee4f..30fdd09dea05 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -150,9 +150,9 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return -ENOMEM; if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + ret = xen_pci_frontend_enable_msix(dev, v, nvec); else - ret = xen_pci_frontend_enable_msi(dev, &v); + ret = xen_pci_frontend_enable_msi(dev, v); if (ret) goto error; i = 0; diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 5c7b6ad68056..492b7d807fe8 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = { #ifdef CONFIG_PCI_MSI static int pci_frontend_enable_msix(struct pci_dev *dev, - int **vector, int nvec) + int vector[], int nvec) { int err; int i; @@ -282,10 +282,10 @@ static int pci_frontend_enable_msix(struct pci_dev *dev, dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n", i, op.msix_entries[i].vector); err = -EINVAL; - *(*vector+i) = -1; + vector[i] = -1; continue; } - *(*vector+i) = op.msix_entries[i].vector; + vector[i] = op.msix_entries[i].vector; } } else { printk(KERN_DEBUG "enable msix get value %x\n", @@ -316,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev) dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); } -static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[]) { int err; struct xen_pci_op op = { @@ -330,12 +330,12 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) err = do_pci_op(pdev, &op); if (likely(!err)) { - *(*vector) = op.value; + vector[0] = op.value; if (op.value <= 0) { dev_warn(&dev->dev, "MSI entry is invalid: %d!\n", op.value); err = -EINVAL; - *(*vector) = -1; + vector[0] = -1; } } else { dev_err(&dev->dev, "pci frontend enable msi failed for dev " From 3d74a539ae07a8f3c061332e426fc07b2310cf05 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 17 Feb 2011 16:12:51 -0500 Subject: [PATCH 06/17] pci/xen: When free-ing MSI-X/MSI irq->desc also use generic code. This code path is only run when an MSI/MSI-X PCI device is passed in to PV DomU. In 2.6.37 time-frame we over-wrote the default cleanup handler for MSI/MSI-X irq->desc to be "xen_teardown_msi_irqs". That function calls the the xen-pcifront driver which can tell the backend to cleanup/take back the MSI/MSI-X device. However, we forgot to continue the process of free-ing the MSI/MSI-X device resources (irq->desc) in the PV domU side. Which is what the default cleanup handler: default_teardown_msi_irqs did. Hence we would leak IRQ descriptors. Without this patch, doing "rmmod igbvf;modprobe igbvf" multiple times ends with abandoned IRQ descriptors: 28: 5 xen-pirq-pcifront-msi-x 29: 8 xen-pirq-pcifront-msi-x ... 130: 10 xen-pirq-pcifront-msi-x with the end result of running out of IRQ descriptors. Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 30fdd09dea05..57afd1da491d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -193,6 +193,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_pci_frontend_disable_msix(dev); else xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); } static void xen_teardown_msi_irq(unsigned int irq) From 3f2a230caf21a1f7ac75f9e4892d0e5af9ccee88 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 11 Jan 2011 17:20:13 +0000 Subject: [PATCH 07/17] xen: handled remapped IRQs when enabling a pcifront PCI device. This happens to not be an issue currently because we take pains to try to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that regular event channels do not clash. However a subsequent patch is going to break this 1-1 mapping. Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk Cc: Stefano Stabellini Cc: Jeremy Fitzhardinge --- arch/x86/pci/xen.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..2a12f3dbdd02 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) { int rc; int share = 1; + u8 gsi; - dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); - - if (dev->irq < 0) - return -EINVAL; - - if (dev->irq < NR_IRQS_LEGACY) - share = 0; - - rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", - dev->irq, rc); + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); return rc; } + + if (gsi < NR_IRQS_LEGACY) + share = 0; + + rc = xen_allocate_pirq(gsi, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", + gsi, rc); + return rc; + } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); return 0; } From cbf6aa89fc52c5253ee141d53eeb73147eb37ac0 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 11 Jan 2011 17:20:14 +0000 Subject: [PATCH 08/17] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI The only caller is xen_allocate_pirq_msi which is also under this ifdef so this fixes: drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used when CONFIG_PCI_MSI=n Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk Cc: Stefano Stabellini Cc: Jeremy Fitzhardinge --- drivers/xen/events.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 74681478100a..1ae775742325 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -387,23 +387,6 @@ static int get_nr_hw_irqs(void) return ret; } -static int find_unbound_pirq(int type) -{ - int rc, i; - struct physdev_get_free_pirq op_get_free_pirq; - op_get_free_pirq.type = type; - - rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); - if (!rc) - return op_get_free_pirq.pirq; - - for (i = 0; i < nr_irqs; i++) { - if (pirq_to_irq[i] < 0) - return i; - } - return -1; -} - static int find_unbound_irq(void) { struct irq_data *data; @@ -677,6 +660,23 @@ out: #include #include "../pci/msi.h" +static int find_unbound_pirq(int type) +{ + int rc, i; + struct physdev_get_free_pirq op_get_free_pirq; + op_get_free_pirq.type = type; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); + if (!rc) + return op_get_free_pirq.pirq; + + for (i = 0; i < nr_irqs; i++) { + if (pirq_to_irq[i] < 0) + return i; + } + return -1; +} + void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) { spin_lock(&irq_mapping_update_lock); From c9df1ce585e3bb5a2f101c1d87381b285a9f962f Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 11 Jan 2011 17:20:15 +0000 Subject: [PATCH 09/17] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq This is neater than open-coded calls to irq_alloc_desc_at and irq_free_desc. No intended behavioural change. Note that we previously were not checking the return value of irq_alloc_desc_at which would be failing for GSI Signed-off-by: Konrad Rzeszutek Wilk Cc: Stefano Stabellini Cc: Jeremy Fitzhardinge --- drivers/xen/events.c | 53 +++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 1ae775742325..81a53eb6cd1d 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -387,7 +387,7 @@ static int get_nr_hw_irqs(void) return ret; } -static int find_unbound_irq(void) +static int xen_allocate_irq_dynamic(void) { struct irq_data *data; int irq, res; @@ -436,6 +436,30 @@ static bool identity_mapped_irq(unsigned irq) return irq < get_nr_hw_irqs(); } +static int xen_allocate_irq_gsi(unsigned gsi) +{ + int irq; + + if (!identity_mapped_irq(gsi) && + (xen_initial_domain() || !xen_pv_domain())) + return xen_allocate_irq_dynamic(); + + /* Legacy IRQ descriptors are already allocated by the arch. */ + if (gsi < NR_IRQS_LEGACY) + return gsi; + + irq = irq_alloc_desc_at(gsi, -1); + if (irq < 0) + panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq); + + return irq; +} + +static void xen_free_irq(unsigned irq) +{ + irq_free_desc(irq); +} + static void pirq_unmask_notify(int irq) { struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; @@ -621,14 +645,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) goto out; /* XXX need refcount? */ } - /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore - * we are using the !xen_initial_domain() to drop in the function.*/ - if (identity_mapped_irq(gsi) || (!xen_initial_domain() && - xen_pv_domain())) { - irq = gsi; - irq_alloc_desc_at(irq, -1); - } else - irq = find_unbound_irq(); + irq = xen_allocate_irq_gsi(gsi); set_irq_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); @@ -641,7 +658,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - irq_free_desc(irq); + xen_free_irq(irq); irq = -ENOSPC; goto out; } @@ -682,7 +699,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) spin_lock(&irq_mapping_update_lock); if (alloc & XEN_ALLOC_IRQ) { - *irq = find_unbound_irq(); + *irq = xen_allocate_irq_dynamic(); if (*irq == -1) goto out; } @@ -732,7 +749,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) spin_lock(&irq_mapping_update_lock); - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); if (irq == -1) goto out; @@ -741,7 +758,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) if (rc) { printk(KERN_WARNING "xen map irq failed %d\n", rc); - irq_free_desc(irq); + xen_free_irq(irq); irq = -1; goto out; @@ -783,7 +800,7 @@ int xen_destroy_irq(int irq) } irq_info[irq] = mk_unbound_info(); - irq_free_desc(irq); + xen_free_irq(irq); out: spin_unlock(&irq_mapping_update_lock); @@ -814,7 +831,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq = evtchn_to_irq[evtchn]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_fasteoi_irq, "event"); @@ -839,7 +856,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq = per_cpu(ipi_to_irq, cpu)[ipi]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); if (irq < 0) goto out; @@ -875,7 +892,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) irq = per_cpu(virq_to_irq, cpu)[virq]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); set_irq_chip_and_handler_name(irq, &xen_percpu_chip, handle_percpu_irq, "virq"); @@ -934,7 +951,7 @@ static void unbind_from_irq(unsigned int irq) if (irq_info[irq].type != IRQT_UNBOUND) { irq_info[irq] = mk_unbound_info(); - irq_free_desc(irq); + xen_free_irq(irq); } spin_unlock(&irq_mapping_update_lock); From 89911501f3aae44a43984793341a3bf1f4c583c2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Mar 2011 11:57:44 -0500 Subject: [PATCH 10/17] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges. There are three cases which we need to care about, PV guest, PV domain 0 and HVM guest. The PV guest case is simple since it has no access to ACPI or real APICs and therefore has no GSIs therefore we simply dynamically allocate all IRQs. The potentially interesting case here is PIRQ type event channels associated with passed through PCI devices. However even in this case the guest has no direct interaction with the physical GSI since that happens in the PCI backend. The PV domain 0 and HVM guest cases are actually the same. In domain 0 case the kernel sees the host ACPI and GSIs (although it only sees the APIC indirectly via the hypervisor) and in the HVM guest case it sees the virtualised ACPI and emulated APICs. In these cases we start allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with any GSI. Currently xen_allocate_irq_dynamic starts at nr_irqs and works backwards looking for a free IRQ in order to (try and) avoid clashing with GSIs used in domain 0 and in HVM guests. This change avoids that although we retain the behaviour of allowing dynamic IRQs to encroach on the GSI range if no suitable IRQs are available since a future IRQ clash is deemed preferable to failure right now. Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk Cc: Stefano Stabellini Cc: Jeremy Fitzhardinge --- drivers/xen/events.c | 81 ++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 52 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 81a53eb6cd1d..06f2e61de691 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -376,72 +376,49 @@ static void unmask_evtchn(int port) put_cpu(); } -static int get_nr_hw_irqs(void) -{ - int ret = 1; - -#ifdef CONFIG_X86_IO_APIC - ret = get_nr_irqs_gsi(); -#endif - - return ret; -} - static int xen_allocate_irq_dynamic(void) { - struct irq_data *data; - int irq, res; - int bottom = get_nr_hw_irqs(); - int top = nr_irqs-1; + int first = 0; + int irq; - if (bottom == nr_irqs) - goto no_irqs; - - /* This loop starts from the top of IRQ space and goes down. - * We need this b/c if we have a PCI device in a Xen PV guest - * we do not have an IO-APIC (though the backend might have them) - * mapped in. To not have a collision of physical IRQs with the Xen - * event channels start at the top of the IRQ space for virtual IRQs. +#ifdef CONFIG_X86_IO_APIC + /* + * For an HVM guest or domain 0 which see "real" (emulated or + * actual repectively) GSIs we allocate dynamic IRQs + * e.g. those corresponding to event channels or MSIs + * etc. from the range above those "real" GSIs to avoid + * collisions. */ - for (irq = top; irq > bottom; irq--) { - data = irq_get_irq_data(irq); - /* only 15->0 have init'd desc; handle irq > 16 */ - if (!data) - break; - if (data->chip == &no_irq_chip) - break; - if (data->chip != &xen_dynamic_chip) - continue; - if (irq_info[irq].type == IRQT_UNBOUND) - return irq; + if (xen_initial_domain() || xen_hvm_domain()) + first = get_nr_irqs_gsi(); +#endif + +retry: + irq = irq_alloc_desc_from(first, -1); + + if (irq == -ENOMEM && first > NR_IRQS_LEGACY) { + printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n"); + first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY); + goto retry; } - if (irq == bottom) - goto no_irqs; - - res = irq_alloc_desc_at(irq, -1); - - if (WARN_ON(res != irq)) - return -1; + if (irq < 0) + panic("No available IRQ to bind to: increase nr_irqs!\n"); return irq; - -no_irqs: - panic("No available IRQ to bind to: increase nr_irqs!\n"); -} - -static bool identity_mapped_irq(unsigned irq) -{ - /* identity map all the hardware irqs */ - return irq < get_nr_hw_irqs(); } static int xen_allocate_irq_gsi(unsigned gsi) { int irq; - if (!identity_mapped_irq(gsi) && - (xen_initial_domain() || !xen_pv_domain())) + /* + * A PV guest has no concept of a GSI (since it has no ACPI + * nor access to/knowledge of the physical APICs). Therefore + * all IRQs are dynamically allocated from the entire IRQ + * space. + */ + if (xen_pv_domain() && !xen_initial_domain()) return xen_allocate_irq_dynamic(); /* Legacy IRQ descriptors are already allocated by the arch. */ From 7214610475b2847a81478d96e4d3ba0bbe49598c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Feb 2011 09:49:35 +0000 Subject: [PATCH 11/17] xen: events: do not free legacy IRQs c514d00c8057 "xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq" correctly avoids reallocating legacy IRQs (which are managed by the arch core) but erroneously did not prevent them being freed. Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 06f2e61de691..accb37ad0944 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -434,6 +434,10 @@ static int xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq < NR_IRQS_LEGACY) + return; + irq_free_desc(irq); } From 149f256f8ca690c28dd8aa9fb8bcdaf2e93b1e1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 5 Feb 2011 20:08:52 +0000 Subject: [PATCH 12/17] xen: Remove stale irq_chip.end irq_chip.end got obsolete with the removal of __do_IRQ() Signed-off-by: Thomas Gleixner Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index accb37ad0944..c8826b5142c4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -555,23 +555,6 @@ static void ack_pirq(unsigned int irq) } } -static void end_pirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - struct irq_desc *desc = irq_to_desc(irq); - - if (WARN_ON(!desc)) - return; - - if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == - (IRQ_DISABLED|IRQ_PENDING)) { - shutdown_pirq(irq); - } else if (VALID_EVTCHN(evtchn)) { - unmask_evtchn(evtchn); - pirq_unmask_notify(irq); - } -} - static int find_irq_by_gsi(unsigned gsi) { int irq; @@ -1508,7 +1491,6 @@ static struct irq_chip xen_pirq_chip __read_mostly = { .mask = disable_pirq, .ack = ack_pirq, - .end = end_pirq, .set_affinity = set_affinity_irq, From c9e265e030537167c94cbed190826f02e3887f4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 5 Feb 2011 20:08:54 +0000 Subject: [PATCH 13/17] xen: Switch to new irq_chip functions Convert Xen to the new irq_chip functions. Brings us closer to enable CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED Signed-off-by: Thomas Gleixner Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 95 ++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index c8826b5142c4..cf1712fb1c46 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); + cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); #endif clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); @@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void) /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { - cpumask_copy(desc->affinity, cpumask_of(0)); + cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); } #endif @@ -474,7 +474,7 @@ static bool probing_irq(int irq) return desc && desc->action == NULL; } -static unsigned int startup_pirq(unsigned int irq) +static unsigned int __startup_pirq(unsigned int irq) { struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); @@ -512,9 +512,15 @@ out: return 0; } -static void shutdown_pirq(unsigned int irq) +static unsigned int startup_pirq(struct irq_data *data) +{ + return __startup_pirq(data->irq); +} + +static void shutdown_pirq(struct irq_data *data) { struct evtchn_close close; + unsigned int irq = data->irq; struct irq_info *info = info_for_irq(irq); int evtchn = evtchn_from_irq(irq); @@ -534,20 +540,20 @@ static void shutdown_pirq(unsigned int irq) info->evtchn = 0; } -static void enable_pirq(unsigned int irq) +static void enable_pirq(struct irq_data *data) { - startup_pirq(irq); + startup_pirq(data); } -static void disable_pirq(unsigned int irq) +static void disable_pirq(struct irq_data *data) { } -static void ack_pirq(unsigned int irq) +static void ack_pirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); - move_native_irq(irq); + irq_move_irq(data); if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); @@ -1215,11 +1221,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) return 0; } -static int set_affinity_irq(unsigned irq, const struct cpumask *dest) +static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, + bool force) { unsigned tcpu = cpumask_first(dest); - return rebind_irq_to_cpu(irq, tcpu); + return rebind_irq_to_cpu(data->irq, tcpu); } int resend_irq_on_evtchn(unsigned int irq) @@ -1238,35 +1245,35 @@ int resend_irq_on_evtchn(unsigned int irq) return 1; } -static void enable_dynirq(unsigned int irq) +static void enable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) unmask_evtchn(evtchn); } -static void disable_dynirq(unsigned int irq) +static void disable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) mask_evtchn(evtchn); } -static void ack_dynirq(unsigned int irq) +static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); - move_masked_irq(irq); + move_masked_irq(data->irq); if (VALID_EVTCHN(evtchn)) unmask_evtchn(evtchn); } -static int retrigger_dynirq(unsigned int irq) +static int retrigger_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); struct shared_info *sh = HYPERVISOR_shared_info; int ret = 0; @@ -1315,7 +1322,7 @@ static void restore_cpu_pirqs(void) printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); - startup_pirq(irq); + __startup_pirq(irq); } } @@ -1467,44 +1474,44 @@ void xen_irq_resume(void) } static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", + .name = "xen-dyn", - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, - .eoi = ack_dynirq, - .set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + .irq_eoi = ack_dynirq, + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_pirq_chip __read_mostly = { - .name = "xen-pirq", + .name = "xen-pirq", - .startup = startup_pirq, - .shutdown = shutdown_pirq, + .irq_startup = startup_pirq, + .irq_shutdown = shutdown_pirq, - .enable = enable_pirq, - .unmask = enable_pirq, + .irq_enable = enable_pirq, + .irq_unmask = enable_pirq, - .disable = disable_pirq, - .mask = disable_pirq, + .irq_disable = disable_pirq, + .irq_mask = disable_pirq, - .ack = ack_pirq, + .irq_ack = ack_pirq, - .set_affinity = set_affinity_irq, + .irq_set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_percpu_chip __read_mostly = { - .name = "xen-percpu", + .name = "xen-percpu", - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, - .ack = ack_dynirq, + .irq_ack = ack_dynirq, }; int xen_set_callback_via(uint64_t via) From aa673c1cb3a66d0b37595251c4e8bb688efc8726 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 7 Feb 2011 11:08:39 +0000 Subject: [PATCH 14/17] xen: Fix compile error introduced by "switch to new irq_chip functions" drivers/xen/events.c: In function 'ack_pirq': drivers/xen/events.c:568: error: implicit declaration of function 'irq_move_irq' Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index cf1712fb1c46..5aa422a3c3cd 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -553,7 +553,7 @@ static void ack_pirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); - irq_move_irq(data); + move_native_irq(data->irq); if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); From f611f2da99420abc973c32cdbddbf5c365d0a20c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 8 Feb 2011 14:03:31 +0000 Subject: [PATCH 15/17] xen/timer: Missing IRQF_NO_SUSPEND in timer code broke suspend. The patches missed an indirect use of IRQF_NO_SUSPEND pulled in via IRQF_TIMER. The following patch fixes the issue. With this fixlet PV guest migration works just fine. I also booted the entire series as a dom0 kernel and it appeared fine. Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/time.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 067759e3d6a5..2e2d370a47b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -397,7 +397,9 @@ void xen_setup_timer(int cpu) name = ""; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, + IRQF_DISABLED|IRQF_PERCPU| + IRQF_NOBALANCING|IRQF_TIMER| + IRQF_FORCE_RESUME, name, NULL); evt = &per_cpu(xen_clock_events, cpu); From 676dc3cf5bc36a9e129a3ad8fe3bd7b2ebf20f5d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 5 Feb 2011 20:08:59 +0000 Subject: [PATCH 16/17] xen: Use IRQF_FORCE_RESUME Mark the IRQF_NO_SUSPEND interrupts IRQF_FORCE_RESUME and remove the extra walk through the interrupt descriptors. Signed-off-by: Thomas Gleixner Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 5aa422a3c3cd..975e90fa6d5a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -977,7 +977,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, if (irq < 0) return irq; - irqflags |= IRQF_NO_SUSPEND; + irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME; retval = request_irq(irq, handler, irqflags, devname, dev_id); if (retval != 0) { unbind_from_irq(irq); @@ -1433,7 +1433,6 @@ void xen_poll_irq(int irq) void xen_irq_resume(void) { unsigned int cpu, irq, evtchn; - struct irq_desc *desc; init_evtchn_cpu_bindings(); @@ -1453,23 +1452,6 @@ void xen_irq_resume(void) restore_cpu_ipis(cpu); } - /* - * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These - * are not handled by the IRQ core. - */ - for_each_irq_desc(irq, desc) { - if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND)) - continue; - if (desc->status & IRQ_DISABLED) - continue; - - evtchn = evtchn_from_irq(irq); - if (evtchn == -1) - continue; - - unmask_evtchn(evtchn); - } - restore_cpu_pirqs(); } From 1aa0b51a033d4a1ec6d29d06487e053398afa21b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 17 Feb 2011 11:23:58 -0500 Subject: [PATCH 17/17] xen/irq: Cleanup up the pirq_to_irq for DomU PV PCI passthrough guests as well. We only did this for PV guests that are xen_initial_domain() but there is not reason not to do this for other cases. The other case is only exercised when you pass in a PCI device to a PV guest _and_ the device in question. Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 975e90fa6d5a..89987a7bf26f 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -766,8 +766,9 @@ int xen_destroy_irq(int irq) printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } - pirq_to_irq[info->u.pirq.pirq] = -1; } + pirq_to_irq[info->u.pirq.pirq] = -1; + irq_info[irq] = mk_unbound_info(); xen_free_irq(irq);