From 91aeb563bd4332e2988f8c0f64f125c4ecb5bcb3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 22 Nov 2023 11:25:31 -0800 Subject: [PATCH 1/3] vfio/pds: Fix mutex lock->magic != lock warning The following BUG was found when running on a kernel with CONFIG_DEBUG_MUTEXES=y set: DEBUG_LOCKS_WARN_ON(lock->magic != lock) RIP: 0010:mutex_trylock+0x10d/0x120 Call Trace: ? __warn+0x85/0x140 ? mutex_trylock+0x10d/0x120 ? report_bug+0xfc/0x1e0 ? handle_bug+0x3f/0x70 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? mutex_trylock+0x10d/0x120 ? mutex_trylock+0x10d/0x120 pds_vfio_reset+0x3a/0x60 [pds_vfio_pci] pci_reset_function+0x4b/0x70 reset_store+0x5b/0xa0 kernfs_fop_write_iter+0x137/0x1d0 vfs_write+0x2de/0x410 ksys_write+0x5d/0xd0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 As shown, lock->magic != lock. This is because mutex_init(&pds_vfio->state_mutex) is called in the VFIO open path. So, if a reset is initiated before the VFIO device is opened the mutex will have never been initialized. Fix this by calling mutex_init(&pds_vfio->state_mutex) in the VFIO init path. Also, don't destroy the mutex on close because the device may be re-opened, which would cause mutex to be uninitialized. Fix this by implementing a driver specific vfio_device_ops.release callback that destroys the mutex before calling vfio_pci_core_release_dev(). Fixes: bb500dbe2ac6 ("vfio/pds: Add VFIO live migration support") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20231122192532.25791-2-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/vfio_dev.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 649b18ee394b..8c9fb87b13e1 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -155,6 +155,8 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pds_vfio->vf_id = vf_id; + mutex_init(&pds_vfio->state_mutex); + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; vdev->mig_ops = &pds_vfio_lm_ops; vdev->log_ops = &pds_vfio_log_ops; @@ -168,6 +170,16 @@ static int pds_vfio_init_device(struct vfio_device *vdev) return 0; } +static void pds_vfio_release_device(struct vfio_device *vdev) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + + mutex_destroy(&pds_vfio->state_mutex); + vfio_pci_core_release_dev(vdev); +} + static int pds_vfio_open_device(struct vfio_device *vdev) { struct pds_vfio_pci_device *pds_vfio = @@ -179,7 +191,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev) if (err) return err; - mutex_init(&pds_vfio->state_mutex); pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; @@ -199,14 +210,13 @@ static void pds_vfio_close_device(struct vfio_device *vdev) pds_vfio_put_save_file(pds_vfio); pds_vfio_dirty_disable(pds_vfio, true); mutex_unlock(&pds_vfio->state_mutex); - mutex_destroy(&pds_vfio->state_mutex); vfio_pci_core_close_device(vdev); } static const struct vfio_device_ops pds_vfio_ops = { .name = "pds-vfio", .init = pds_vfio_init_device, - .release = vfio_pci_core_release_dev, + .release = pds_vfio_release_device, .open_device = pds_vfio_open_device, .close_device = pds_vfio_close_device, .ioctl = vfio_pci_core_ioctl, From ae2667cd8a479bb5abd6e24c12fcc9ef5bc06d75 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 22 Nov 2023 11:25:32 -0800 Subject: [PATCH 2/3] vfio/pds: Fix possible sleep while in atomic context The driver could possibly sleep while in atomic context resulting in the following call trace while CONFIG_DEBUG_ATOMIC_SLEEP=y is set: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2817, name: bash preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 Call Trace: dump_stack_lvl+0x36/0x50 __might_resched+0x123/0x170 mutex_lock+0x1e/0x50 pds_vfio_put_lm_file+0x1e/0xa0 [pds_vfio_pci] pds_vfio_put_save_file+0x19/0x30 [pds_vfio_pci] pds_vfio_state_mutex_unlock+0x2e/0x80 [pds_vfio_pci] pci_reset_function+0x4b/0x70 reset_store+0x5b/0xa0 kernfs_fop_write_iter+0x137/0x1d0 vfs_write+0x2de/0x410 ksys_write+0x5d/0xd0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 This can happen if pds_vfio_put_restore_file() and/or pds_vfio_put_save_file() grab the mutex_lock(&lm_file->lock) while the spin_lock(&pds_vfio->reset_lock) is held, which can happen during while calling pds_vfio_state_mutex_unlock(). Fix this by changing the reset_lock to reset_mutex so there are no such conerns. Also, make sure to destroy the reset_mutex in the driver specific VFIO device release function. This also fixes a spinlock bad magic BUG that was caused by not calling spinlock_init() on the reset_lock. Since, the lock is being changed to a mutex, make sure to call mutex_init() on it. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kvm/1f9bc27b-3de9-4891-9687-ba2820c1b390@moroto.mountain/ Fixes: bb500dbe2ac6 ("vfio/pds: Add VFIO live migration support") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20231122192532.25791-3-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/pci_drv.c | 4 ++-- drivers/vfio/pci/pds/vfio_dev.c | 14 ++++++++------ drivers/vfio/pci/pds/vfio_dev.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index dd8c00c895a2..a34dda516629 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -55,10 +55,10 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio) * VFIO_DEVICE_STATE_RUNNING. */ if (deferred_reset_needed) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } } diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 8c9fb87b13e1..4c351c59d05a 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -29,7 +29,7 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev) void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) { again: - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); if (pds_vfio->deferred_reset) { pds_vfio->deferred_reset = false; if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { @@ -39,23 +39,23 @@ again: } pds_vfio->state = pds_vfio->deferred_reset_state; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); goto again; } mutex_unlock(&pds_vfio->state_mutex); - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; if (!mutex_trylock(&pds_vfio->state_mutex)) { - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); return; } - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); pds_vfio_state_mutex_unlock(pds_vfio); } @@ -156,6 +156,7 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pds_vfio->vf_id = vf_id; mutex_init(&pds_vfio->state_mutex); + mutex_init(&pds_vfio->reset_mutex); vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; vdev->mig_ops = &pds_vfio_lm_ops; @@ -177,6 +178,7 @@ static void pds_vfio_release_device(struct vfio_device *vdev) vfio_coredev.vdev); mutex_destroy(&pds_vfio->state_mutex); + mutex_destroy(&pds_vfio->reset_mutex); vfio_pci_core_release_dev(vdev); } diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h index b8f2d667608f..e7b01080a1ec 100644 --- a/drivers/vfio/pci/pds/vfio_dev.h +++ b/drivers/vfio/pci/pds/vfio_dev.h @@ -18,7 +18,7 @@ struct pds_vfio_pci_device { struct pds_vfio_dirty dirty; struct mutex state_mutex; /* protect migration state */ enum vfio_device_mig_state state; - spinlock_t reset_lock; /* protect reset_done flow */ + struct mutex reset_mutex; /* protect reset_done flow */ u8 deferred_reset; enum vfio_device_mig_state deferred_reset_state; struct notifier_block nb; From 4ea95c04fa6b9043a1a301240996aeebe3cb28ec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 16:10:00 -0800 Subject: [PATCH 3/3] vfio: Drop vfio_file_iommu_group() stub to fudge around a KVM wart Drop the vfio_file_iommu_group() stub and instead unconditionally declare the function to fudge around a KVM wart where KVM tries to do symbol_get() on vfio_file_iommu_group() (and other VFIO symbols) even if CONFIG_VFIO=n. Ensuring the symbol is always declared fixes a PPC build error when modules are also disabled, in which case symbol_get() simply points at the address of the symbol (with some attributes shenanigans). Because KVM does symbol_get() instead of directly depending on VFIO, the lack of a fully defined symbol is not problematic (ugly, but "fine"). arch/powerpc/kvm/../../../virt/kvm/vfio.c:89:7: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] fn = symbol_get(vfio_file_iommu_group); ^ include/linux/module.h:805:60: note: expanded from macro 'symbol_get' #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) ^ include/linux/vfio.h:294:35: note: previous definition is here static inline struct iommu_group *vfio_file_iommu_group(struct file *file) ^ arch/powerpc/kvm/../../../virt/kvm/vfio.c:89:7: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] fn = symbol_get(vfio_file_iommu_group); ^ include/linux/module.h:805:65: note: expanded from macro 'symbol_get' #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) ^ include/linux/vfio.h:294:35: note: previous definition is here static inline struct iommu_group *vfio_file_iommu_group(struct file *file) ^ 2 errors generated. Although KVM is firmly in the wrong (there is zero reason for KVM to build virt/kvm/vfio.c when VFIO is disabled), fudge around the error in VFIO as the stub is unnecessary and doesn't serve its intended purpose (KVM is the only external user of vfio_file_iommu_group()), and there is an in-flight series to clean up the entire KVM<->VFIO interaction, i.e. fixing this in KVM would result in more churn in the long run, and the stub needs to go away regardless. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308251949.5IiaV0sz-lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309030741.82aLACDG-lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309110914.QLH0LU6L-lkp@intel.com Link: https://lore.kernel.org/all/0-v1-08396538817d+13c5-vfio_kvm_kconfig_jgg@nvidia.com Link: https://lore.kernel.org/all/20230916003118.2540661-1-seanjc@google.com Cc: Nick Desaulniers Cc: Jason Gunthorpe Tested-by: Michael Ellerman Fixes: c1cce6d079b8 ("vfio: Compile vfio_group infrastructure optionally") Signed-off-by: Sean Christopherson Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231130001000.543240-1-seanjc@google.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 454e9295970c..a65b2513f8cd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -289,16 +289,12 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, /* * External user API */ -#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #else -static inline struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - return NULL; -} - static inline bool vfio_file_is_group(struct file *file) { return false;