Skip to content

Commit

Permalink
vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error
Browse files Browse the repository at this point in the history
Before a SAVE command is issued, a QUERY command is issued in order to
know the device data size.
In case PRE_COPY is used, the above commands are issued while the device
is running. Thus, it is possible that between the QUERY and the SAVE
commands the state of the device will be changed significantly and thus
the SAVE will fail.

Currently, if a SAVE command is failing, the driver will fail the
migration. In the above case, don't fail the migration, but don't allow
for new SAVEs to be executed while the device is in a RUNNING state.
Once the device will be moved to STOP_COPY, SAVE can be executed again
and the full device state will be read.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-14-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
  • Loading branch information
Shay Drory authored and Alex Williamson committed Dec 6, 2022
1 parent 34e2f27 commit d6e18a4
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
27 changes: 26 additions & 1 deletion drivers/vfio/pci/mlx5/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
if (ret)
return ret;
if (mvdev->saving_migf->state ==
MLX5_MIGF_STATE_PRE_COPY_ERROR) {
/*
* In case we had a PRE_COPY error, only query full
* image for final image
*/
if (!(query_flags & MLX5VF_QUERY_FINAL)) {
*state_size = 0;
complete(&mvdev->saving_migf->save_comp);
return 0;
}
query_flags &= ~MLX5VF_QUERY_INC;
}
}

MLX5_SET(query_vhca_migration_state_in, in, opcode,
Expand Down Expand Up @@ -442,7 +455,10 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
mlx5vf_put_data_buffer(async_data->buf);
if (async_data->header_buf)
mlx5vf_put_data_buffer(async_data->header_buf);
migf->state = MLX5_MIGF_STATE_ERROR;
if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR)
migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR;
else
migf->state = MLX5_MIGF_STATE_ERROR;
wake_up_interruptible(&migf->poll_wait);
}
mutex_unlock(&migf->lock);
Expand Down Expand Up @@ -511,6 +527,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
* The error and the cleanup flows can't run from an
* interrupt context
*/
if (status == -EREMOTEIO)
status = MLX5_GET(save_vhca_state_out, async_data->out, status);
async_data->status = status;
queue_work(migf->mvdev->cb_wq, &async_data->work);
}
Expand All @@ -534,6 +552,13 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err)
return err;

if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
/*
* In case we had a PRE_COPY error, SAVE is triggered only for
* the final image, read device full image.
*/
inc = false;

MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
Expand Down
2 changes: 2 additions & 0 deletions drivers/vfio/pci/mlx5/cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1,
MLX5_MIGF_STATE_PRE_COPY_ERROR,
MLX5_MIGF_STATE_PRE_COPY,
MLX5_MIGF_STATE_SAVE_LAST,
MLX5_MIGF_STATE_COMPLETE,
Expand Down Expand Up @@ -157,6 +158,7 @@ struct mlx5vf_pci_core_device {

enum {
MLX5VF_QUERY_INC = (1UL << 0),
MLX5VF_QUERY_FINAL = (1UL << 1),
};

int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
Expand Down
6 changes: 4 additions & 2 deletions drivers/vfio/pci/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
if (wait_event_interruptible(migf->poll_wait,
!list_empty(&migf->buf_list) ||
migf->state == MLX5_MIGF_STATE_ERROR ||
migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR ||
migf->state == MLX5_MIGF_STATE_PRE_COPY ||
migf->state == MLX5_MIGF_STATE_COMPLETE))
return -ERESTARTSYS;
Expand All @@ -238,7 +239,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
if (first_loop_call) {
first_loop_call = false;
/* Temporary end of file as part of PRE_COPY */
if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) {
if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY ||
migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) {
done = -ENOMSG;
goto out_unlock;
}
Expand Down Expand Up @@ -431,7 +433,7 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
return -ENODEV;

ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
MLX5VF_QUERY_INC);
MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
if (ret)
goto err;

Expand Down

0 comments on commit d6e18a4

Please sign in to comment.