linux/drivers/virtio/virtio_pci_modern.c
Jason Wang 9e35276a53 virtio_pci: harden MSI-X interrupts
We used to synchronize pending MSI-X irq handlers via
synchronize_irq(), this may not work for the untrusted device which
may keep sending interrupts after reset which may lead unexpected
results. Similarly, we should not enable MSI-X interrupt until the
device is ready. So this patch fixes those two issues by:

1) switching to use disable_irq() to prevent the virtio interrupt
   handlers to be called after the device is reset.
2) using IRQF_NO_AUTOEN and enable the MSI-X irq during .ready()

This can make sure the virtio interrupt handler won't be called before
virtio_device_ready() and after reset.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20211019070152.8236-5-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2021-11-01 05:26:48 -04:00

451 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Virtio PCI driver - modern (virtio 1.0) device support
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
* Copyright Red Hat, Inc. 2014
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Rusty Russell <rusty@rustcorp.com.au>
* Michael S. Tsirkin <mst@redhat.com>
*/
#include <linux/delay.h>
#define VIRTIO_PCI_NO_LEGACY
#define VIRTIO_RING_NO_LEGACY
#include "virtio_pci_common.h"
static u64 vp_get_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_modern_get_features(&vp_dev->mdev);
}
static void vp_transport_features(struct virtio_device *vdev, u64 features)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct pci_dev *pci_dev = vp_dev->pci_dev;
if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
}
/* virtio config->finalize_features() implementation */
static int vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u64 features = vdev->features;
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
/* Give virtio_pci a chance to accept features. */
vp_transport_features(vdev, features);
if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
dev_err(&vdev->dev, "virtio: device uses modern interface "
"but does not have VIRTIO_F_VERSION_1\n");
return -EINVAL;
}
vp_modern_set_features(&vp_dev->mdev, vdev->features);
return 0;
}
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
void __iomem *device = mdev->device;
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > mdev->device_len);
switch (len) {
case 1:
b = ioread8(device + offset);
memcpy(buf, &b, sizeof b);
break;
case 2:
w = cpu_to_le16(ioread16(device + offset));
memcpy(buf, &w, sizeof w);
break;
case 4:
l = cpu_to_le32(ioread32(device + offset));
memcpy(buf, &l, sizeof l);
break;
case 8:
l = cpu_to_le32(ioread32(device + offset));
memcpy(buf, &l, sizeof l);
l = cpu_to_le32(ioread32(device + offset + sizeof l));
memcpy(buf + sizeof l, &l, sizeof l);
break;
default:
BUG();
}
}
/* the config->set() implementation. it's symmetric to the config->get()
* implementation */
static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
void __iomem *device = mdev->device;
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > mdev->device_len);
switch (len) {
case 1:
memcpy(&b, buf, sizeof b);
iowrite8(b, device + offset);
break;
case 2:
memcpy(&w, buf, sizeof w);
iowrite16(le16_to_cpu(w), device + offset);
break;
case 4:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), device + offset);
break;
case 8:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), device + offset);
memcpy(&l, buf + sizeof l, sizeof l);
iowrite32(le32_to_cpu(l), device + offset + sizeof l);
break;
default:
BUG();
}
}
static u32 vp_generation(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_modern_generation(&vp_dev->mdev);
}
/* config->{get,set}_status() implementations */
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_modern_get_status(&vp_dev->mdev);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
vp_modern_set_status(&vp_dev->mdev, status);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
/* 0 status means a reset. */
vp_modern_set_status(mdev, 0);
/* After writing 0 to device_status, the driver MUST wait for a read of
* device_status to return 0 before reinitializing the device.
* This will flush out the status write, and flush in device writes,
* including MSI-X interrupts, if any.
*/
while (vp_modern_get_status(mdev))
msleep(1);
/* Disable VQ/configuration callbacks. */
vp_disable_cbs(vdev);
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
return vp_modern_config_vector(&vp_dev->mdev, vector);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
bool ctx,
u16 msix_vec)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtqueue *vq;
u16 num;
int err;
if (index >= vp_modern_get_num_queues(mdev))
return ERR_PTR(-ENOENT);
/* Check if queue is either not available or already active. */
num = vp_modern_get_queue_size(mdev, index);
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
if (num & (num - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
return ERR_PTR(-EINVAL);
}
info->msix_vector = msix_vec;
/* create the vring */
vq = vring_create_virtqueue(index, num,
SMP_CACHE_BYTES, &vp_dev->vdev,
true, true, ctx,
vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
/* activate the queue */
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
}
}
return vq;
err_assign_vector:
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
vring_del_virtqueue(vq);
return ERR_PTR(err);
}
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
if (rc)
return rc;
/* Select and activate all queues. Has to be done last: once we do
* this, there's no way to go back except reset.
*/
list_for_each_entry(vq, &vdev->vqs, list)
vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
return 0;
}
static void del_vq(struct virtio_pci_vq_info *info)
{
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
if (vp_dev->msix_enabled)
vp_modern_queue_vector(mdev, vq->index,
VIRTIO_MSI_NO_VECTOR);
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv);
vring_del_virtqueue(vq);
}
static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id,
u8 *bar, u64 *offset, u64 *len)
{
int pos;
for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0;
pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
u8 type, cap_len, id;
u32 tmp32;
u64 res_offset, res_length;
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
cfg_type), &type);
if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
continue;
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
cap_len), &cap_len);
if (cap_len != sizeof(struct virtio_pci_cap64)) {
dev_err(&dev->dev, "%s: shm cap with bad size offset:"
" %d size: %d\n", __func__, pos, cap_len);
continue;
}
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
id), &id);
if (id != required_id)
continue;
/* Type, and ID match, looks good */
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
bar), bar);
/* Read the lower 32bit of length and offset */
pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
offset), &tmp32);
res_offset = tmp32;
pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
length), &tmp32);
res_length = tmp32;
/* and now the top half */
pci_read_config_dword(dev,
pos + offsetof(struct virtio_pci_cap64,
offset_hi), &tmp32);
res_offset |= ((u64)tmp32) << 32;
pci_read_config_dword(dev,
pos + offsetof(struct virtio_pci_cap64,
length_hi), &tmp32);
res_length |= ((u64)tmp32) << 32;
*offset = res_offset;
*len = res_length;
return pos;
}
return 0;
}
static bool vp_get_shm_region(struct virtio_device *vdev,
struct virtio_shm_region *region, u8 id)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct pci_dev *pci_dev = vp_dev->pci_dev;
u8 bar;
u64 offset, len;
phys_addr_t phys_addr;
size_t bar_len;
if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len))
return false;
phys_addr = pci_resource_start(pci_dev, bar);
bar_len = pci_resource_len(pci_dev, bar);
if ((offset + len) < offset) {
dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n",
__func__);
return false;
}
if (offset + len > bar_len) {
dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n",
__func__);
return false;
}
region->len = len;
region->addr = (u64) phys_addr + offset;
return true;
}
static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.enable_cbs = vp_enable_cbs,
.get = NULL,
.set = NULL,
.generation = vp_generation,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_modern_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
};
static const struct virtio_config_ops virtio_pci_config_ops = {
.enable_cbs = vp_enable_cbs,
.get = vp_get,
.set = vp_set,
.generation = vp_generation,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_modern_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
};
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int err;
mdev->pci_dev = pci_dev;
err = vp_modern_probe(mdev);
if (err)
return err;
if (mdev->device)
vp_dev->vdev.config = &virtio_pci_config_ops;
else
vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
vp_dev->isr = mdev->isr;
vp_dev->vdev.id = mdev->id;
return 0;
}
void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
vp_modern_remove(mdev);
}