Files
linux/drivers/vfio/pci/vfio_pci_intrs.c

724 lines
17 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* VFIO PCI interrupt handling
*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*
* Derived from original vfio:
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
* Author: Tom Lyon, pugs@cisco.com
*/
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/eventfd.h>
#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/file.h>
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include "vfio_pci_priv.h"
struct vfio_pci_irq_ctx {
struct eventfd_ctx *trigger;
struct virqfd *unmask;
struct virqfd *mask;
char *name;
bool masked;
struct irq_bypass_producer producer;
};
static bool irq_is(struct vfio_pci_core_device *vdev, int type)
{
return vdev->irq_type == type;
}
static bool is_intx(struct vfio_pci_core_device *vdev)
{
return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
}
static bool is_irq_none(struct vfio_pci_core_device *vdev)
{
return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
}
/*
* INTx
*/
static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
if (likely(is_intx(vdev) && !vdev->virq_disabled))
eventfd_signal(vdev->ctx[0].trigger, 1);
}
vfio/pci: Mask INTx during runtime suspend This patch adds INTx handling during runtime suspend/resume. All the suspend/resume related code for the user to put the device into the low power state will be added in subsequent patches. The INTx lines may be shared among devices. Whenever any INTx interrupt comes for the VFIO devices, then vfio_intx_handler() will be called for each device sharing the interrupt. Inside vfio_intx_handler(), it calls pci_check_and_mask_intx() and checks if the interrupt has been generated for the current device. Now, if the device is already in the D3cold state, then the config space can not be read. Attempt to read config space in D3cold state can cause system unresponsiveness in a few systems. To prevent this, mask INTx in runtime suspend callback, and unmask the same in runtime resume callback. If INTx has been already masked, then no handling is needed in runtime suspend/resume callbacks. 'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated to return true if the INTx vfio_pci_irq_ctx.masked value is changed inside this function. For the runtime suspend which is triggered for the no user of VFIO device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these callbacks won't do anything. The MSI/MSI-X are not shared so similar handling should not be needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal() without doing any device-specific config access. When the user performs any config access or IOCTL after receiving the eventfd notification, then the device will be moved to the D0 state first before servicing any request. Another option was to check this flag 'pm_intx_masked' inside vfio_intx_handler() instead of masking the interrupts. This flag is being set inside the runtime_suspend callback but the device can be in non-D3cold state (for example, if the user has disabled D3cold explicitly by sysfs, the D3cold is not supported in the platform, etc.). Also, in D3cold supported case, the device will be in D0 till the PCI core moves the device into D3cold. In this case, there is a possibility that the device can generate an interrupt. Adding check in the IRQ handler will not clear the IRQ status and the interrupt line will still be asserted. This can cause interrupt flooding. Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com> Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long flags;
vfio/pci: Mask INTx during runtime suspend This patch adds INTx handling during runtime suspend/resume. All the suspend/resume related code for the user to put the device into the low power state will be added in subsequent patches. The INTx lines may be shared among devices. Whenever any INTx interrupt comes for the VFIO devices, then vfio_intx_handler() will be called for each device sharing the interrupt. Inside vfio_intx_handler(), it calls pci_check_and_mask_intx() and checks if the interrupt has been generated for the current device. Now, if the device is already in the D3cold state, then the config space can not be read. Attempt to read config space in D3cold state can cause system unresponsiveness in a few systems. To prevent this, mask INTx in runtime suspend callback, and unmask the same in runtime resume callback. If INTx has been already masked, then no handling is needed in runtime suspend/resume callbacks. 'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated to return true if the INTx vfio_pci_irq_ctx.masked value is changed inside this function. For the runtime suspend which is triggered for the no user of VFIO device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these callbacks won't do anything. The MSI/MSI-X are not shared so similar handling should not be needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal() without doing any device-specific config access. When the user performs any config access or IOCTL after receiving the eventfd notification, then the device will be moved to the D0 state first before servicing any request. Another option was to check this flag 'pm_intx_masked' inside vfio_intx_handler() instead of masking the interrupts. This flag is being set inside the runtime_suspend callback but the device can be in non-D3cold state (for example, if the user has disabled D3cold explicitly by sysfs, the D3cold is not supported in the platform, etc.). Also, in D3cold supported case, the device will be in D0 till the PCI core moves the device into D3cold. In this case, there is a possibility that the device can generate an interrupt. Adding check in the IRQ handler will not clear the IRQ status and the interrupt line will still be asserted. This can cause interrupt flooding. Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com> Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
bool masked_changed = false;
spin_lock_irqsave(&vdev->irqlock, flags);
/*
* Masking can come from interrupt, ioctl, or config space
* via INTx disable. The latter means this can get called
* even when not using intx delivery. In this case, just
* try to have the physical bit follow the virtual bit.
*/
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 0);
} else if (!vdev->ctx[0].masked) {
/*
* Can't use check_and_mask here because we always want to
* mask, not just when something is pending.
*/
if (vdev->pci_2_3)
pci_intx(pdev, 0);
else
disable_irq_nosync(pdev->irq);
vdev->ctx[0].masked = true;
vfio/pci: Mask INTx during runtime suspend This patch adds INTx handling during runtime suspend/resume. All the suspend/resume related code for the user to put the device into the low power state will be added in subsequent patches. The INTx lines may be shared among devices. Whenever any INTx interrupt comes for the VFIO devices, then vfio_intx_handler() will be called for each device sharing the interrupt. Inside vfio_intx_handler(), it calls pci_check_and_mask_intx() and checks if the interrupt has been generated for the current device. Now, if the device is already in the D3cold state, then the config space can not be read. Attempt to read config space in D3cold state can cause system unresponsiveness in a few systems. To prevent this, mask INTx in runtime suspend callback, and unmask the same in runtime resume callback. If INTx has been already masked, then no handling is needed in runtime suspend/resume callbacks. 'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated to return true if the INTx vfio_pci_irq_ctx.masked value is changed inside this function. For the runtime suspend which is triggered for the no user of VFIO device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these callbacks won't do anything. The MSI/MSI-X are not shared so similar handling should not be needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal() without doing any device-specific config access. When the user performs any config access or IOCTL after receiving the eventfd notification, then the device will be moved to the D0 state first before servicing any request. Another option was to check this flag 'pm_intx_masked' inside vfio_intx_handler() instead of masking the interrupts. This flag is being set inside the runtime_suspend callback but the device can be in non-D3cold state (for example, if the user has disabled D3cold explicitly by sysfs, the D3cold is not supported in the platform, etc.). Also, in D3cold supported case, the device will be in D0 till the PCI core moves the device into D3cold. In this case, there is a possibility that the device can generate an interrupt. Adding check in the IRQ handler will not clear the IRQ status and the interrupt line will still be asserted. This can cause interrupt flooding. Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com> Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
masked_changed = true;
}
spin_unlock_irqrestore(&vdev->irqlock, flags);
vfio/pci: Mask INTx during runtime suspend This patch adds INTx handling during runtime suspend/resume. All the suspend/resume related code for the user to put the device into the low power state will be added in subsequent patches. The INTx lines may be shared among devices. Whenever any INTx interrupt comes for the VFIO devices, then vfio_intx_handler() will be called for each device sharing the interrupt. Inside vfio_intx_handler(), it calls pci_check_and_mask_intx() and checks if the interrupt has been generated for the current device. Now, if the device is already in the D3cold state, then the config space can not be read. Attempt to read config space in D3cold state can cause system unresponsiveness in a few systems. To prevent this, mask INTx in runtime suspend callback, and unmask the same in runtime resume callback. If INTx has been already masked, then no handling is needed in runtime suspend/resume callbacks. 'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated to return true if the INTx vfio_pci_irq_ctx.masked value is changed inside this function. For the runtime suspend which is triggered for the no user of VFIO device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these callbacks won't do anything. The MSI/MSI-X are not shared so similar handling should not be needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal() without doing any device-specific config access. When the user performs any config access or IOCTL after receiving the eventfd notification, then the device will be moved to the D0 state first before servicing any request. Another option was to check this flag 'pm_intx_masked' inside vfio_intx_handler() instead of masking the interrupts. This flag is being set inside the runtime_suspend callback but the device can be in non-D3cold state (for example, if the user has disabled D3cold explicitly by sysfs, the D3cold is not supported in the platform, etc.). Also, in D3cold supported case, the device will be in D0 till the PCI core moves the device into D3cold. In this case, there is a possibility that the device can generate an interrupt. Adding check in the IRQ handler will not clear the IRQ status and the interrupt line will still be asserted. This can cause interrupt flooding. Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com> Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-08-29 17:18:48 +05:30
return masked_changed;
}
/*
* If this is triggered by an eventfd, we can't call eventfd_signal
* or else we'll deadlock on the eventfd wait queue. Return >0 when
* a signal is necessary, which can then be handled via a work queue
* or directly depending on the caller.
*/
static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev;
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&vdev->irqlock, flags);
/*
* Unmasking comes from ioctl or config, so again, have the
* physical bit follow the virtual even when not using INTx.
*/
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 1);
} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
/*
* A pending interrupt here would immediately trigger,
* but we can avoid that overhead by just re-sending
* the interrupt to the user.
*/
if (vdev->pci_2_3) {
if (!pci_check_and_unmask_intx(pdev))
ret = 1;
} else
enable_irq(pdev->irq);
vdev->ctx[0].masked = (ret > 0);
}
spin_unlock_irqrestore(&vdev->irqlock, flags);
return ret;
}
void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
{
if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
vfio_send_intx_eventfd(vdev, NULL);
}
static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{
struct vfio_pci_core_device *vdev = dev_id;
unsigned long flags;
int ret = IRQ_NONE;
spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3) {
disable_irq_nosync(vdev->pdev->irq);
vdev->ctx[0].masked = true;
ret = IRQ_HANDLED;
} else if (!vdev->ctx[0].masked && /* may be shared */
pci_check_and_mask_intx(vdev->pdev)) {
vdev->ctx[0].masked = true;
ret = IRQ_HANDLED;
}
spin_unlock_irqrestore(&vdev->irqlock, flags);
if (ret == IRQ_HANDLED)
vfio_send_intx_eventfd(vdev, NULL);
return ret;
}
static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
{
if (!is_irq_none(vdev))
return -EINVAL;
if (!vdev->pdev->irq)
return -ENODEV;
vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
if (!vdev->ctx)
return -ENOMEM;
vdev->num_ctx = 1;
/*
* If the virtual interrupt is masked, restore it. Devices
* supporting DisINTx can be masked at the hardware level
* here, non-PCI-2.3 devices will have to wait until the
* interrupt is enabled.
*/
vdev->ctx[0].masked = vdev->virq_disabled;
if (vdev->pci_2_3)
pci_intx(vdev->pdev, !vdev->ctx[0].masked);
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
return 0;
}
static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long irqflags = IRQF_SHARED;
struct eventfd_ctx *trigger;
unsigned long flags;
int ret;
if (vdev->ctx[0].trigger) {
free_irq(pdev->irq, vdev);
kfree(vdev->ctx[0].name);
eventfd_ctx_put(vdev->ctx[0].trigger);
vdev->ctx[0].trigger = NULL;
}
if (fd < 0) /* Disable only */
return 0;
vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
pci_name(pdev));
if (!vdev->ctx[0].name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
kfree(vdev->ctx[0].name);
return PTR_ERR(trigger);
}
vdev->ctx[0].trigger = trigger;
if (!vdev->pci_2_3)
irqflags = 0;
ret = request_irq(pdev->irq, vfio_intx_handler,
irqflags, vdev->ctx[0].name, vdev);
if (ret) {
vdev->ctx[0].trigger = NULL;
kfree(vdev->ctx[0].name);
eventfd_ctx_put(trigger);
return ret;
}
/*
* INTx disable will stick across the new irq setup,
* disable_irq won't.
*/
spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3 && vdev->ctx[0].masked)
disable_irq_nosync(pdev->irq);
spin_unlock_irqrestore(&vdev->irqlock, flags);
return 0;
}
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
vfio_virqfd_disable(&vdev->ctx[0].unmask);
vfio_virqfd_disable(&vdev->ctx[0].mask);
vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0;
kfree(vdev->ctx);
}
/*
* MSI/MSI-X
*/
static irqreturn_t vfio_msihandler(int irq, void *arg)
{
struct eventfd_ctx *trigger = arg;
eventfd_signal(trigger, 1);
return IRQ_HANDLED;
}
static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
int ret;
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
u16 cmd;
if (!is_irq_none(vdev))
return -EINVAL;
vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
if (!vdev->ctx)
return -ENOMEM;
/* return the number of supported vectors if we can't get all: */
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
if (ret < nvec) {
if (ret > 0)
pci_free_irq_vectors(pdev);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx);
return ret;
}
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
vfio_pci_memory_unlock_and_restore(vdev, cmd);
vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
VFIO_PCI_MSI_IRQ_INDEX;
if (!msix) {
/*
* Compute the virtual hardware field for max msi vectors -
* it is the log base 2 of the number of vectors.
*/
vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
}
return 0;
}
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
struct eventfd_ctx *trigger;
int irq, ret;
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
u16 cmd;
if (vector < 0 || vector >= vdev->num_ctx)
return -EINVAL;
irq = pci_irq_vector(pdev, vector);
if (vdev->ctx[vector].trigger) {
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
cmd = vfio_pci_memory_lock_and_enable(vdev);
free_irq(irq, vdev->ctx[vector].trigger);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
}
if (fd < 0)
return 0;
vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
msix ? "x" : "", vector,
pci_name(pdev));
if (!vdev->ctx[vector].name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
kfree(vdev->ctx[vector].name);
return PTR_ERR(trigger);
}
/*
* The MSIx vector table resides in device memory which may be cleared
* via backdoor resets. We don't allow direct access to the vector
* table so even if a userspace driver attempts to save/restore around
* such a reset it would be unsuccessful. To avoid this, restore the
* cached value of the message prior to enabling.
*/
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) {
struct msi_msg msg;
get_cached_msi_msg(irq, &msg);
pci_write_msi_msg(irq, &msg);
}
ret = request_irq(irq, vfio_msihandler, 0,
vdev->ctx[vector].name, trigger);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
vfio_pci_memory_unlock_and_restore(vdev, cmd);
if (ret) {
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(trigger);
return ret;
}
vdev->ctx[vector].producer.token = trigger;
vdev->ctx[vector].producer.irq = irq;
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
if (unlikely(ret)) {
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
vdev->ctx[vector].producer.token, ret);
vdev->ctx[vector].producer.token = NULL;
}
vdev->ctx[vector].trigger = trigger;
return 0;
}
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
unsigned count, int32_t *fds, bool msix)
{
int i, j, ret = 0;
if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
return -EINVAL;
for (i = 0, j = start; i < count && !ret; i++, j++) {
int fd = fds ? fds[i] : -1;
ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
}
if (ret) {
for (--j; j >= (int)start; j--)
vfio_msi_set_vector_signal(vdev, j, -1, msix);
}
return ret;
}
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
int i;
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
u16 cmd;
for (i = 0; i < vdev->num_ctx; i++) {
vfio_virqfd_disable(&vdev->ctx[i].unmask);
vfio_virqfd_disable(&vdev->ctx[i].mask);
}
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev);
vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-04-22 13:48:11 -06:00
vfio_pci_memory_unlock_and_restore(vdev, cmd);
/*
* Both disable paths above use pci_intx_for_msi() to clear DisINTx
* via their shutdown paths. Restore for NoINTx devices.
*/
if (vdev->nointx)
pci_intx(pdev, 0);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0;
kfree(vdev->ctx);
}
/*
* IOCTL support
*/
static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
if (!is_intx(vdev) || start != 0 || count != 1)
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t unmask = *(uint8_t *)data;
if (unmask)
vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t fd = *(int32_t *)data;
if (fd >= 0)
return vfio_virqfd_enable((void *) vdev,
vfio_pci_intx_unmask_handler,
vfio_send_intx_eventfd, NULL,
&vdev->ctx[0].unmask, fd);
vfio_virqfd_disable(&vdev->ctx[0].unmask);
}
return 0;
}
static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
if (!is_intx(vdev) || start != 0 || count != 1)
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
vfio_pci_intx_mask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t mask = *(uint8_t *)data;
if (mask)
vfio_pci_intx_mask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
return -ENOTTY; /* XXX implement me */
}
return 0;
}
static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
vfio_intx_disable(vdev);
return 0;
}
if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t fd = *(int32_t *)data;
int ret;
if (is_intx(vdev))
return vfio_intx_set_signal(vdev, fd);
ret = vfio_intx_enable(vdev);
if (ret)
return ret;
ret = vfio_intx_set_signal(vdev, fd);
if (ret)
vfio_intx_disable(vdev);
return ret;
}
if (!is_intx(vdev))
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
vfio_send_intx_eventfd(vdev, NULL);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t trigger = *(uint8_t *)data;
if (trigger)
vfio_send_intx_eventfd(vdev, NULL);
}
return 0;
}
static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
int i;
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
vfio_msi_disable(vdev, msix);
return 0;
}
if (!(irq_is(vdev, index) || is_irq_none(vdev)))
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t *fds = data;
int ret;
if (vdev->irq_type == index)
return vfio_msi_set_block(vdev, start, count,
fds, msix);
ret = vfio_msi_enable(vdev, start + count, msix);
if (ret)
return ret;
ret = vfio_msi_set_block(vdev, start, count, fds, msix);
if (ret)
vfio_msi_disable(vdev, msix);
return ret;
}
if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
return -EINVAL;
for (i = start; i < start + count; i++) {
if (!vdev->ctx[i].trigger)
continue;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
eventfd_signal(vdev->ctx[i].trigger, 1);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t *bools = data;
if (bools[i - start])
eventfd_signal(vdev->ctx[i].trigger, 1);
}
}
return 0;
}
static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
unsigned int count, uint32_t flags,
void *data)
{
/* DATA_NONE/DATA_BOOL enables loopback testing */
if (flags & VFIO_IRQ_SET_DATA_NONE) {
if (*ctx) {
if (count) {
eventfd_signal(*ctx, 1);
} else {
eventfd_ctx_put(*ctx);
*ctx = NULL;
}
return 0;
}
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t trigger;
if (!count)
return -EINVAL;
trigger = *(uint8_t *)data;
if (trigger && *ctx)
eventfd_signal(*ctx, 1);
return 0;
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t fd;
if (!count)
return -EINVAL;
fd = *(int32_t *)data;
if (fd == -1) {
if (*ctx)
eventfd_ctx_put(*ctx);
*ctx = NULL;
} else if (fd >= 0) {
struct eventfd_ctx *efdctx;
efdctx = eventfd_ctx_fdget(fd);
if (IS_ERR(efdctx))
return PTR_ERR(efdctx);
if (*ctx)
eventfd_ctx_put(*ctx);
*ctx = efdctx;
}
return 0;
}
return -EINVAL;
}
static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
return -EINVAL;
return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
count, flags, data);
}
static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
return -EINVAL;
return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
count, flags, data);
}
int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
unsigned index, unsigned start, unsigned count,
void *data)
{
int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
unsigned start, unsigned count, uint32_t flags,
void *data) = NULL;
switch (index) {
case VFIO_PCI_INTX_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_MASK:
func = vfio_pci_set_intx_mask;
break;
case VFIO_IRQ_SET_ACTION_UNMASK:
func = vfio_pci_set_intx_unmask;
break;
case VFIO_IRQ_SET_ACTION_TRIGGER:
func = vfio_pci_set_intx_trigger;
break;
}
break;
case VFIO_PCI_MSI_IRQ_INDEX:
case VFIO_PCI_MSIX_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_MASK:
case VFIO_IRQ_SET_ACTION_UNMASK:
/* XXX Need masking support exported */
break;
case VFIO_IRQ_SET_ACTION_TRIGGER:
func = vfio_pci_set_msi_trigger;
break;
}
break;
case VFIO_PCI_ERR_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_TRIGGER:
if (pci_is_pcie(vdev->pdev))
func = vfio_pci_set_err_trigger;
break;
}
break;
case VFIO_PCI_REQ_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_TRIGGER:
func = vfio_pci_set_req_trigger;
break;
}
break;
}
if (!func)
return -ENOTTY;
return func(vdev, index, start, count, flags, data);
}