vhost: Release worker mutex during flushes

In the next patches where the worker can be killed while in use, we
need to be able to take the worker mutex and kill queued works for
new IO and flushes, and set some new flags to prevent new
__vhost_vq_attach_worker calls from swapping in/out killed workers.

If we are holding the worker mutex during a flush and the flush's work
is still in the queue, the worker code that will handle the SIGKILL
cleanup won't be able to take the mutex and perform it's cleanup. So
this patch has us drop the worker mutex while waiting for the flush
to complete.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Message-Id: <20240316004707.45557-8-michael.christie@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Mike Christie 2024-03-15 19:47:05 -05:00 committed by Michael S. Tsirkin
parent 34cf9ba5f0
commit ba704ff4e1

View File

@ -264,21 +264,36 @@ bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
EXPORT_SYMBOL_GPL(vhost_vq_work_queue); EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
/** /**
* vhost_worker_flush - flush a worker * __vhost_worker_flush - flush a worker
* @worker: worker to flush * @worker: worker to flush
* *
* This does not use RCU to protect the worker, so the device or worker * The worker's flush_mutex must be held.
* mutex must be held.
*/ */
static void vhost_worker_flush(struct vhost_worker *worker) static void __vhost_worker_flush(struct vhost_worker *worker)
{ {
struct vhost_flush_struct flush; struct vhost_flush_struct flush;
if (!worker->attachment_cnt)
return;
init_completion(&flush.wait_event); init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work); vhost_work_init(&flush.work, vhost_flush_work);
vhost_worker_queue(worker, &flush.work); vhost_worker_queue(worker, &flush.work);
/*
* Drop mutex in case our worker is killed and it needs to take the
* mutex to force cleanup.
*/
mutex_unlock(&worker->mutex);
wait_for_completion(&flush.wait_event); wait_for_completion(&flush.wait_event);
mutex_lock(&worker->mutex);
}
static void vhost_worker_flush(struct vhost_worker *worker)
{
mutex_lock(&worker->mutex);
__vhost_worker_flush(worker);
mutex_unlock(&worker->mutex);
} }
void vhost_dev_flush(struct vhost_dev *dev) void vhost_dev_flush(struct vhost_dev *dev)
@ -286,15 +301,8 @@ void vhost_dev_flush(struct vhost_dev *dev)
struct vhost_worker *worker; struct vhost_worker *worker;
unsigned long i; unsigned long i;
xa_for_each(&dev->worker_xa, i, worker) { xa_for_each(&dev->worker_xa, i, worker)
mutex_lock(&worker->mutex);
if (!worker->attachment_cnt) {
mutex_unlock(&worker->mutex);
continue;
}
vhost_worker_flush(worker); vhost_worker_flush(worker);
mutex_unlock(&worker->mutex);
}
} }
EXPORT_SYMBOL_GPL(vhost_dev_flush); EXPORT_SYMBOL_GPL(vhost_dev_flush);
@ -673,7 +681,6 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
* device wide flushes which doesn't use RCU for execution. * device wide flushes which doesn't use RCU for execution.
*/ */
mutex_lock(&old_worker->mutex); mutex_lock(&old_worker->mutex);
old_worker->attachment_cnt--;
/* /*
* We don't want to call synchronize_rcu for every vq during setup * We don't want to call synchronize_rcu for every vq during setup
* because it will slow down VM startup. If we haven't done * because it will slow down VM startup. If we haven't done
@ -684,6 +691,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
if (!vhost_vq_get_backend(vq) && !vq->kick) { if (!vhost_vq_get_backend(vq) && !vq->kick) {
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
old_worker->attachment_cnt--;
mutex_unlock(&old_worker->mutex); mutex_unlock(&old_worker->mutex);
/* /*
* vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
@ -699,7 +708,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
/* Make sure new vq queue/flush/poll calls see the new worker */ /* Make sure new vq queue/flush/poll calls see the new worker */
synchronize_rcu(); synchronize_rcu();
/* Make sure whatever was queued gets run */ /* Make sure whatever was queued gets run */
vhost_worker_flush(old_worker); __vhost_worker_flush(old_worker);
old_worker->attachment_cnt--;
mutex_unlock(&old_worker->mutex); mutex_unlock(&old_worker->mutex);
} }
@ -752,6 +762,12 @@ static int vhost_free_worker(struct vhost_dev *dev,
mutex_unlock(&worker->mutex); mutex_unlock(&worker->mutex);
return -EBUSY; return -EBUSY;
} }
/*
* A flush might have raced and snuck in before attachment_cnt was set
* to zero. Make sure flushes are flushed from the queue before
* freeing.
*/
__vhost_worker_flush(worker);
mutex_unlock(&worker->mutex); mutex_unlock(&worker->mutex);
vhost_worker_destroy(dev, worker); vhost_worker_destroy(dev, worker);