Skip to content

Commit

Permalink
vfio/pds: Fix possible sleep while in atomic context
Browse files Browse the repository at this point in the history
The driver could possibly sleep while in atomic context resulting
in the following call trace while CONFIG_DEBUG_ATOMIC_SLEEP=y is
set:

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2817, name: bash
preempt_count: 1, expected: 0
RCU nest depth: 0, expected: 0
Call Trace:
 <TASK>
 dump_stack_lvl+0x36/0x50
 __might_resched+0x123/0x170
 mutex_lock+0x1e/0x50
 pds_vfio_put_lm_file+0x1e/0xa0 [pds_vfio_pci]
 pds_vfio_put_save_file+0x19/0x30 [pds_vfio_pci]
 pds_vfio_state_mutex_unlock+0x2e/0x80 [pds_vfio_pci]
 pci_reset_function+0x4b/0x70
 reset_store+0x5b/0xa0
 kernfs_fop_write_iter+0x137/0x1d0
 vfs_write+0x2de/0x410
 ksys_write+0x5d/0xd0
 do_syscall_64+0x3b/0x90
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8

This can happen if pds_vfio_put_restore_file() and/or
pds_vfio_put_save_file() grab the mutex_lock(&lm_file->lock)
while the spin_lock(&pds_vfio->reset_lock) is held, which can
happen during while calling pds_vfio_state_mutex_unlock().

Fix this by changing the reset_lock to reset_mutex so there are no such
conerns. Also, make sure to destroy the reset_mutex in the driver specific
VFIO device release function.

This also fixes a spinlock bad magic BUG that was caused
by not calling spinlock_init() on the reset_lock. Since, the lock is
being changed to a mutex, make sure to call mutex_init() on it.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/kvm/1f9bc27b-3de9-4891-9687-ba2820c1b390@moroto.mountain/
Fixes: bb500db ("vfio/pds: Add VFIO live migration support")
Signed-off-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Link: https://lore.kernel.org/r/20231122192532.25791-3-brett.creeley@amd.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
  • Loading branch information
Brett Creeley authored and Alex Williamson committed Nov 27, 2023
1 parent 91aeb56 commit ae2667c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 9 deletions.
4 changes: 2 additions & 2 deletions drivers/vfio/pci/pds/pci_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
* VFIO_DEVICE_STATE_RUNNING.
*/
if (deferred_reset_needed) {
spin_lock(&pds_vfio->reset_lock);
mutex_lock(&pds_vfio->reset_mutex);
pds_vfio->deferred_reset = true;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
spin_unlock(&pds_vfio->reset_lock);
mutex_unlock(&pds_vfio->reset_mutex);
}
}

Expand Down
14 changes: 8 additions & 6 deletions drivers/vfio/pci/pds/vfio_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev)
void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
{
again:
spin_lock(&pds_vfio->reset_lock);
mutex_lock(&pds_vfio->reset_mutex);
if (pds_vfio->deferred_reset) {
pds_vfio->deferred_reset = false;
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
Expand All @@ -39,23 +39,23 @@ void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
}
pds_vfio->state = pds_vfio->deferred_reset_state;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
spin_unlock(&pds_vfio->reset_lock);
mutex_unlock(&pds_vfio->reset_mutex);
goto again;
}
mutex_unlock(&pds_vfio->state_mutex);
spin_unlock(&pds_vfio->reset_lock);
mutex_unlock(&pds_vfio->reset_mutex);
}

void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
{
spin_lock(&pds_vfio->reset_lock);
mutex_lock(&pds_vfio->reset_mutex);
pds_vfio->deferred_reset = true;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
if (!mutex_trylock(&pds_vfio->state_mutex)) {
spin_unlock(&pds_vfio->reset_lock);
mutex_unlock(&pds_vfio->reset_mutex);
return;
}
spin_unlock(&pds_vfio->reset_lock);
mutex_unlock(&pds_vfio->reset_mutex);
pds_vfio_state_mutex_unlock(pds_vfio);
}

Expand Down Expand Up @@ -156,6 +156,7 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
pds_vfio->vf_id = vf_id;

mutex_init(&pds_vfio->state_mutex);
mutex_init(&pds_vfio->reset_mutex);

vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
vdev->mig_ops = &pds_vfio_lm_ops;
Expand All @@ -177,6 +178,7 @@ static void pds_vfio_release_device(struct vfio_device *vdev)
vfio_coredev.vdev);

mutex_destroy(&pds_vfio->state_mutex);
mutex_destroy(&pds_vfio->reset_mutex);
vfio_pci_core_release_dev(vdev);
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/vfio/pci/pds/vfio_dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ struct pds_vfio_pci_device {
struct pds_vfio_dirty dirty;
struct mutex state_mutex; /* protect migration state */
enum vfio_device_mig_state state;
spinlock_t reset_lock; /* protect reset_done flow */
struct mutex reset_mutex; /* protect reset_done flow */
u8 deferred_reset;
enum vfio_device_mig_state deferred_reset_state;
struct notifier_block nb;
Expand Down

0 comments on commit ae2667c

Please sign in to comment.