VFIO doc update and virqfd race fix

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABAgAGBQJQXJxnAAoJECObm247sIsisU8P/1Iy6CiBYx7mzF4crPJBcMld
 fKAYWEqgiwju/nsT6NJMCY+WZ9YpbMXjXqdgKWEuSuWgHnbaX8RW6qKR0+fVgfWN
 K2Gekav5rtzqlFvs6TfRbJmxGclLh7q7Xpm98+YBbazLbMt1gLSn7BHIXFD20mNC
 QrX3HNKAEqzftgM7muIH9gWUlxX0lGN+wtc+cPkkGfe5voy3L8dSd+7oY7Rd4hxo
 0M79lSnUjulVmRaYyORvha9Kl7QqAmhzNOu8tlTon8mfzVwnfPo6RMFYk3dlP43M
 QK896iwjcss7n+G/q92dJZhLl23G3Bd8MHh02l0c9/e7/vI68Mfd10jc/WJ3imIV
 5WD5isXyqqdZjLupbvYF9xwe+r1IS1zPixPi77kimvETIJHvxzW1+C+dAXtSIF1N
 Bpw2dif73Rbx1cbR4opbFdYjRnNfpEzuDIyeCuaeoENED3+/0dltDuNC5RlKi4/K
 2QQRRHnwYH2HnTR03nr4IZ2qwPclR+XMiIJ48AP70xc1dB0AnoH3jXdeQzqYlZlq
 le2ascWj9GXCBoaw1kO4ttC0ykT1Ew5BbpR7TKxwkwYU7HkpyBpoAU53I/h5UcQV
 Q9y+yoZ4M6bBk++kk+jpGJragLen4X2pN0+JA9V7i4u6JCJToCYBF9rrPtorsO5V
 MDPWJZvb/w2+zWImVvlc
 =biAQ
 -----END PGP SIGNATURE-----

Merge tag 'vfio-for-linus' of git://github.com/awilliam/linux-vfio

Pull vfio fixes from Alex Williamson:
 "VFIO doc update and virqfd race fix"

* tag 'vfio-for-linus' of git://github.com/awilliam/linux-vfio:
  vfio: Fix virqfd release race
  vfio: Trivial Documentation correction
This commit is contained in:
Linus Torvalds 2012-09-24 16:16:33 -07:00
commit 1abbce4e83
2 changed files with 57 additions and 21 deletions

View File

@ -133,7 +133,7 @@ character devices for this group:
$ lspci -n -s 0000:06:0d.0 $ lspci -n -s 0000:06:0d.0
06:0d.0 0401: 1102:0002 (rev 08) 06:0d.0 0401: 1102:0002 (rev 08)
# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
# echo 1102 0002 > /sys/bus/pci/drivers/vfio/new_id # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
Now we need to look at what other devices are in the group to free Now we need to look at what other devices are in the group to free
it for use by VFIO: it for use by VFIO:

View File

@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
schedule_work(&virqfd->inject); schedule_work(&virqfd->inject);
} }
if (flags & POLLHUP) if (flags & POLLHUP) {
/* The eventfd is closing, detach from VFIO */ unsigned long flags;
spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
/*
* The eventfd is closing, if the virqfd has not yet been
* queued for release, as determined by testing whether the
* vdev pointer to it is still valid, queue it now. As
* with kvm irqfds, we know we won't race against the virqfd
* going away because we hold wqh->lock to get here.
*/
if (*(virqfd->pvirqfd) == virqfd) {
*(virqfd->pvirqfd) = NULL;
virqfd_deactivate(virqfd); virqfd_deactivate(virqfd);
}
spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
}
return 0; return 0;
} }
@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file,
static void virqfd_shutdown(struct work_struct *work) static void virqfd_shutdown(struct work_struct *work)
{ {
struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
struct virqfd **pvirqfd = virqfd->pvirqfd;
u64 cnt; u64 cnt;
eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work)
eventfd_ctx_put(virqfd->eventfd); eventfd_ctx_put(virqfd->eventfd);
kfree(virqfd); kfree(virqfd);
*pvirqfd = NULL;
} }
static void virqfd_inject(struct work_struct *work) static void virqfd_inject(struct work_struct *work)
@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev,
int ret = 0; int ret = 0;
unsigned int events; unsigned int events;
if (*pvirqfd)
return -EBUSY;
virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
if (!virqfd) if (!virqfd)
return -ENOMEM; return -ENOMEM;
virqfd->pvirqfd = pvirqfd; virqfd->pvirqfd = pvirqfd;
*pvirqfd = virqfd;
virqfd->vdev = vdev; virqfd->vdev = vdev;
virqfd->handler = handler; virqfd->handler = handler;
virqfd->thread = thread; virqfd->thread = thread;
@ -153,6 +162,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev,
virqfd->eventfd = ctx; virqfd->eventfd = ctx;
/*
* virqfds can be released by closing the eventfd or directly
* through ioctl. These are both done through a workqueue, so
* we update the pointer to the virqfd under lock to avoid
* pushing multiple jobs to release the same virqfd.
*/
spin_lock_irq(&vdev->irqlock);
if (*pvirqfd) {
spin_unlock_irq(&vdev->irqlock);
ret = -EBUSY;
goto fail;
}
*pvirqfd = virqfd;
spin_unlock_irq(&vdev->irqlock);
/* /*
* Install our own custom wake-up handling so we are notified via * Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd. * a callback whenever someone signals the underlying eventfd.
@ -187,19 +213,29 @@ fail:
fput(file); fput(file);
kfree(virqfd); kfree(virqfd);
*pvirqfd = NULL;
return ret; return ret;
} }
static void virqfd_disable(struct virqfd *virqfd) static void virqfd_disable(struct vfio_pci_device *vdev,
struct virqfd **pvirqfd)
{ {
if (!virqfd) unsigned long flags;
return;
virqfd_deactivate(virqfd); spin_lock_irqsave(&vdev->irqlock, flags);
/* Block until we know all outstanding shutdown jobs have completed. */ if (*pvirqfd) {
virqfd_deactivate(*pvirqfd);
*pvirqfd = NULL;
}
spin_unlock_irqrestore(&vdev->irqlock, flags);
/*
* Block until we know all outstanding shutdown jobs have completed.
* Even if we don't queue the job, flush the wq to be sure it's
* been released.
*/
flush_workqueue(vfio_irqfd_cleanup_wq); flush_workqueue(vfio_irqfd_cleanup_wq);
} }
@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_device *vdev) static void vfio_intx_disable(struct vfio_pci_device *vdev)
{ {
vfio_intx_set_signal(vdev, -1); vfio_intx_set_signal(vdev, -1);
virqfd_disable(vdev->ctx[0].unmask); virqfd_disable(vdev, &vdev->ctx[0].unmask);
virqfd_disable(vdev->ctx[0].mask); virqfd_disable(vdev, &vdev->ctx[0].mask);
vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0; vdev->num_ctx = 0;
kfree(vdev->ctx); kfree(vdev->ctx);
@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
for (i = 0; i < vdev->num_ctx; i++) { for (i = 0; i < vdev->num_ctx; i++) {
virqfd_disable(vdev->ctx[i].unmask); virqfd_disable(vdev, &vdev->ctx[i].unmask);
virqfd_disable(vdev->ctx[i].mask); virqfd_disable(vdev, &vdev->ctx[i].mask);
} }
if (msix) { if (msix) {
@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
vfio_send_intx_eventfd, NULL, vfio_send_intx_eventfd, NULL,
&vdev->ctx[0].unmask, fd); &vdev->ctx[0].unmask, fd);
virqfd_disable(vdev->ctx[0].unmask); virqfd_disable(vdev, &vdev->ctx[0].unmask);
} }
return 0; return 0;