vfio/mlx5: Use its own PCI reset_done error handler
Register its own handler for pci_error_handlers.reset_done and update state accordingly. Link: https://lore.kernel.org/all/20220224142024.147653-16-yishaih@nvidia.com Reviewed-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
This commit is contained in:
committed by
Leon Romanovsky
parent
915076f70e
commit
88faa5e8ea
@@ -29,9 +29,12 @@ struct mlx5vf_pci_core_device {
|
|||||||
struct vfio_pci_core_device core_device;
|
struct vfio_pci_core_device core_device;
|
||||||
u16 vhca_id;
|
u16 vhca_id;
|
||||||
u8 migrate_cap:1;
|
u8 migrate_cap:1;
|
||||||
|
u8 deferred_reset:1;
|
||||||
/* protect migration state */
|
/* protect migration state */
|
||||||
struct mutex state_mutex;
|
struct mutex state_mutex;
|
||||||
enum vfio_device_mig_state mig_state;
|
enum vfio_device_mig_state mig_state;
|
||||||
|
/* protect the reset_done flow */
|
||||||
|
spinlock_t reset_lock;
|
||||||
struct mlx5_vf_migration_file *resuming_migf;
|
struct mlx5_vf_migration_file *resuming_migf;
|
||||||
struct mlx5_vf_migration_file *saving_migf;
|
struct mlx5_vf_migration_file *saving_migf;
|
||||||
};
|
};
|
||||||
@@ -437,6 +440,25 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
|
|||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is called in all state_mutex unlock cases to
|
||||||
|
* handle a 'deferred_reset' if exists.
|
||||||
|
*/
|
||||||
|
static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
|
||||||
|
{
|
||||||
|
again:
|
||||||
|
spin_lock(&mvdev->reset_lock);
|
||||||
|
if (mvdev->deferred_reset) {
|
||||||
|
mvdev->deferred_reset = false;
|
||||||
|
spin_unlock(&mvdev->reset_lock);
|
||||||
|
mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
|
||||||
|
mlx5vf_disable_fds(mvdev);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
mutex_unlock(&mvdev->state_mutex);
|
||||||
|
spin_unlock(&mvdev->reset_lock);
|
||||||
|
}
|
||||||
|
|
||||||
static struct file *
|
static struct file *
|
||||||
mlx5vf_pci_set_device_state(struct vfio_device *vdev,
|
mlx5vf_pci_set_device_state(struct vfio_device *vdev,
|
||||||
enum vfio_device_mig_state new_state)
|
enum vfio_device_mig_state new_state)
|
||||||
@@ -465,7 +487,7 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&mvdev->state_mutex);
|
mlx5vf_state_mutex_unlock(mvdev);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -477,10 +499,34 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
|
|||||||
|
|
||||||
mutex_lock(&mvdev->state_mutex);
|
mutex_lock(&mvdev->state_mutex);
|
||||||
*curr_state = mvdev->mig_state;
|
*curr_state = mvdev->mig_state;
|
||||||
mutex_unlock(&mvdev->state_mutex);
|
mlx5vf_state_mutex_unlock(mvdev);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
|
||||||
|
|
||||||
|
if (!mvdev->migrate_cap)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As the higher VFIO layers are holding locks across reset and using
|
||||||
|
* those same locks with the mm_lock we need to prevent ABBA deadlock
|
||||||
|
* with the state_mutex and mm_lock.
|
||||||
|
* In case the state_mutex was taken already we defer the cleanup work
|
||||||
|
* to the unlock flow of the other running context.
|
||||||
|
*/
|
||||||
|
spin_lock(&mvdev->reset_lock);
|
||||||
|
mvdev->deferred_reset = true;
|
||||||
|
if (!mutex_trylock(&mvdev->state_mutex)) {
|
||||||
|
spin_unlock(&mvdev->reset_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
spin_unlock(&mvdev->reset_lock);
|
||||||
|
mlx5vf_state_mutex_unlock(mvdev);
|
||||||
|
}
|
||||||
|
|
||||||
static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
|
static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
|
||||||
{
|
{
|
||||||
struct mlx5vf_pci_core_device *mvdev = container_of(
|
struct mlx5vf_pci_core_device *mvdev = container_of(
|
||||||
@@ -562,6 +608,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
|
|||||||
VFIO_MIGRATION_STOP_COPY |
|
VFIO_MIGRATION_STOP_COPY |
|
||||||
VFIO_MIGRATION_P2P;
|
VFIO_MIGRATION_P2P;
|
||||||
mutex_init(&mvdev->state_mutex);
|
mutex_init(&mvdev->state_mutex);
|
||||||
|
spin_lock_init(&mvdev->reset_lock);
|
||||||
}
|
}
|
||||||
mlx5_vf_put_core_dev(mdev);
|
mlx5_vf_put_core_dev(mdev);
|
||||||
}
|
}
|
||||||
@@ -596,11 +643,17 @@ static const struct pci_device_id mlx5vf_pci_table[] = {
|
|||||||
|
|
||||||
MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
|
MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
|
||||||
|
|
||||||
|
static const struct pci_error_handlers mlx5vf_err_handlers = {
|
||||||
|
.reset_done = mlx5vf_pci_aer_reset_done,
|
||||||
|
.error_detected = vfio_pci_core_aer_err_detected,
|
||||||
|
};
|
||||||
|
|
||||||
static struct pci_driver mlx5vf_pci_driver = {
|
static struct pci_driver mlx5vf_pci_driver = {
|
||||||
.name = KBUILD_MODNAME,
|
.name = KBUILD_MODNAME,
|
||||||
.id_table = mlx5vf_pci_table,
|
.id_table = mlx5vf_pci_table,
|
||||||
.probe = mlx5vf_pci_probe,
|
.probe = mlx5vf_pci_probe,
|
||||||
.remove = mlx5vf_pci_remove,
|
.remove = mlx5vf_pci_remove,
|
||||||
|
.err_handler = &mlx5vf_err_handlers,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void __exit mlx5vf_pci_cleanup(void)
|
static void __exit mlx5vf_pci_cleanup(void)
|
||||||
|
|||||||
Reference in New Issue
Block a user