Skip to content

Commit

Permalink
vfio/pds: Add VFIO live migration support
Browse files Browse the repository at this point in the history
Add live migration support via the VFIO subsystem. The migration
implementation aligns with the definition from uapi/vfio.h and uses
the pds_core PF's adminq for device configuration.

The ability to suspend, resume, and transfer VF device state data is
included along with the required admin queue command structures and
implementations.

PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support
the VF device suspend operation.

PDS_LM_CMD_RESUME is added to support the VF device resume operation.

PDS_LM_CMD_STATE_SIZE is added to determine the exact size of the VF
device state data.

PDS_LM_CMD_SAVE is added to get the VF device state data.

PDS_LM_CMD_RESTORE is added to restore the VF device with the
previously saved data from PDS_LM_CMD_SAVE.

PDS_LM_CMD_HOST_VF_STATUS is added to notify the DSC/firmware when
a migration is in/not-in progress from the host's perspective. The
DSC/firmware can use this to clear/setup any necessary state related
to a migration.

Signed-off-by: Brett Creeley <brett.creeley@amd.com>
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20230807205755.29579-6-brett.creeley@amd.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
  • Loading branch information
Brett Creeley authored and Alex Williamson committed Aug 16, 2023
1 parent 63f77a7 commit bb500db
Show file tree
Hide file tree
Showing 9 changed files with 1,153 additions and 2 deletions.
1 change: 1 addition & 0 deletions drivers/vfio/pci/pds/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds-vfio-pci.o

pds-vfio-pci-y := \
cmds.o \
lm.o \
pci_drv.o \
vfio_dev.o
330 changes: 330 additions & 0 deletions drivers/vfio/pci/pds/cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <linux/io.h>
#include <linux/types.h>
#include <linux/delay.h>

#include <linux/pds/pds_common.h>
#include <linux/pds/pds_core_if.h>
Expand All @@ -11,6 +12,37 @@
#include "vfio_dev.h"
#include "cmds.h"

#define SUSPEND_TIMEOUT_S 5
#define SUSPEND_CHECK_INTERVAL_MS 1

static int pds_vfio_client_adminq_cmd(struct pds_vfio_pci_device *pds_vfio,
union pds_core_adminq_cmd *req,
union pds_core_adminq_comp *resp,
bool fast_poll)
{
struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
union pds_core_adminq_cmd cmd = {};
struct pdsc *pdsc;
int err;

/* Wrap the client request */
cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD;
cmd.client_request.client_id = cpu_to_le16(pds_vfio->client_id);
memcpy(cmd.client_request.client_cmd, req,
sizeof(cmd.client_request.client_cmd));

pdsc = pdsc_get_pf_struct(pdev);
if (IS_ERR(pdsc))
return PTR_ERR(pdsc);

err = pdsc_adminq_post(pdsc, &cmd, resp, fast_poll);
if (err && err != -EAGAIN)
dev_err(pds_vfio_to_dev(pds_vfio),
"client admin cmd failed: %pe\n", ERR_PTR(err));

return err;
}

int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio)
{
struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
Expand Down Expand Up @@ -52,3 +84,301 @@ void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio)

pds_vfio->client_id = 0;
}

static int
pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio)
{
union pds_core_adminq_cmd cmd = {
.lm_suspend_status = {
.opcode = PDS_LM_CMD_SUSPEND_STATUS,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
},
};
struct device *dev = pds_vfio_to_dev(pds_vfio);
union pds_core_adminq_comp comp = {};
unsigned long time_limit;
unsigned long time_start;
unsigned long time_done;
int err;

time_start = jiffies;
time_limit = time_start + HZ * SUSPEND_TIMEOUT_S;
do {
err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
if (err != -EAGAIN)
break;

msleep(SUSPEND_CHECK_INTERVAL_MS);
} while (time_before(jiffies, time_limit));

time_done = jiffies;
dev_dbg(dev, "%s: vf%u: Suspend comp received in %d msecs\n", __func__,
pds_vfio->vf_id, jiffies_to_msecs(time_done - time_start));

/* Check the results */
if (time_after_eq(time_done, time_limit)) {
dev_err(dev, "%s: vf%u: Suspend comp timeout\n", __func__,
pds_vfio->vf_id);
err = -ETIMEDOUT;
}

return err;
}

int pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
{
union pds_core_adminq_cmd cmd = {
.lm_suspend = {
.opcode = PDS_LM_CMD_SUSPEND,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
.type = type,
},
};
struct device *dev = pds_vfio_to_dev(pds_vfio);
union pds_core_adminq_comp comp = {};
int err;

dev_dbg(dev, "vf%u: Suspend device\n", pds_vfio->vf_id);

/*
* The initial suspend request to the firmware starts the device suspend
* operation and the firmware returns success if it's started
* successfully.
*/
err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
if (err) {
dev_err(dev, "vf%u: Suspend failed: %pe\n", pds_vfio->vf_id,
ERR_PTR(err));
return err;
}

/*
* The subsequent suspend status request(s) check if the firmware has
* completed the device suspend process.
*/
return pds_vfio_suspend_wait_device_cmd(pds_vfio);
}

int pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
{
union pds_core_adminq_cmd cmd = {
.lm_resume = {
.opcode = PDS_LM_CMD_RESUME,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
.type = type,
},
};
struct device *dev = pds_vfio_to_dev(pds_vfio);
union pds_core_adminq_comp comp = {};

dev_dbg(dev, "vf%u: Resume device\n", pds_vfio->vf_id);

return pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
}

int pds_vfio_get_lm_state_size_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size)
{
union pds_core_adminq_cmd cmd = {
.lm_state_size = {
.opcode = PDS_LM_CMD_STATE_SIZE,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
},
};
struct device *dev = pds_vfio_to_dev(pds_vfio);
union pds_core_adminq_comp comp = {};
int err;

dev_dbg(dev, "vf%u: Get migration status\n", pds_vfio->vf_id);

err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
if (err)
return err;

*size = le64_to_cpu(comp.lm_state_size.size);
return 0;
}

static int pds_vfio_dma_map_lm_file(struct device *dev,
enum dma_data_direction dir,
struct pds_vfio_lm_file *lm_file)
{
struct pds_lm_sg_elem *sgl, *sge;
struct scatterlist *sg;
dma_addr_t sgl_addr;
size_t sgl_size;
int err;
int i;

if (!lm_file)
return -EINVAL;

/* dma map file pages */
err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0);
if (err)
return err;

lm_file->num_sge = lm_file->sg_table.nents;

/* alloc sgl */
sgl_size = lm_file->num_sge * sizeof(struct pds_lm_sg_elem);
sgl = kzalloc(sgl_size, GFP_KERNEL);
if (!sgl) {
err = -ENOMEM;
goto out_unmap_sgtable;
}

/* fill sgl */
sge = sgl;
for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) {
sge->addr = cpu_to_le64(sg_dma_address(sg));
sge->len = cpu_to_le32(sg_dma_len(sg));
dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len);
sge++;
}

sgl_addr = dma_map_single(dev, sgl, sgl_size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, sgl_addr)) {
err = -EIO;
goto out_free_sgl;
}

lm_file->sgl = sgl;
lm_file->sgl_addr = sgl_addr;

return 0;

out_free_sgl:
kfree(sgl);
out_unmap_sgtable:
lm_file->num_sge = 0;
dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
return err;
}

static void pds_vfio_dma_unmap_lm_file(struct device *dev,
enum dma_data_direction dir,
struct pds_vfio_lm_file *lm_file)
{
if (!lm_file)
return;

/* free sgl */
if (lm_file->sgl) {
dma_unmap_single(dev, lm_file->sgl_addr,
lm_file->num_sge * sizeof(*lm_file->sgl),
DMA_TO_DEVICE);
kfree(lm_file->sgl);
lm_file->sgl = NULL;
lm_file->sgl_addr = DMA_MAPPING_ERROR;
lm_file->num_sge = 0;
}

/* dma unmap file pages */
dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
}

int pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
{
union pds_core_adminq_cmd cmd = {
.lm_save = {
.opcode = PDS_LM_CMD_SAVE,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
},
};
struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
struct device *pdsc_dev = &pci_physfn(pdev)->dev;
union pds_core_adminq_comp comp = {};
struct pds_vfio_lm_file *lm_file;
int err;

dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id);

lm_file = pds_vfio->save_file;

err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);
if (err) {
dev_err(&pdev->dev, "failed to map save migration file: %pe\n",
ERR_PTR(err));
return err;
}

cmd.lm_save.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
cmd.lm_save.num_sge = cpu_to_le32(lm_file->num_sge);

err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
if (err)
dev_err(&pdev->dev, "failed to get migration state: %pe\n",
ERR_PTR(err));

pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);

return err;
}

int pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
{
union pds_core_adminq_cmd cmd = {
.lm_restore = {
.opcode = PDS_LM_CMD_RESTORE,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
},
};
struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
struct device *pdsc_dev = &pci_physfn(pdev)->dev;
union pds_core_adminq_comp comp = {};
struct pds_vfio_lm_file *lm_file;
int err;

dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id);

lm_file = pds_vfio->restore_file;

err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);
if (err) {
dev_err(&pdev->dev,
"failed to map restore migration file: %pe\n",
ERR_PTR(err));
return err;
}

cmd.lm_restore.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
cmd.lm_restore.num_sge = cpu_to_le32(lm_file->num_sge);

err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
if (err)
dev_err(&pdev->dev, "failed to set migration state: %pe\n",
ERR_PTR(err));

pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);

return err;
}

void pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio,
enum pds_lm_host_vf_status vf_status)
{
union pds_core_adminq_cmd cmd = {
.lm_host_vf_status = {
.opcode = PDS_LM_CMD_HOST_VF_STATUS,
.vf_id = cpu_to_le16(pds_vfio->vf_id),
.status = vf_status,
},
};
struct device *dev = pds_vfio_to_dev(pds_vfio);
union pds_core_adminq_comp comp = {};
int err;

dev_dbg(dev, "vf%u: Set host VF LM status: %u", pds_vfio->vf_id,
vf_status);
if (vf_status != PDS_LM_STA_IN_PROGRESS &&
vf_status != PDS_LM_STA_NONE) {
dev_warn(dev, "Invalid host VF migration status, %d\n",
vf_status);
return;
}

err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
if (err)
dev_warn(dev, "failed to send host VF migration status: %pe\n",
ERR_PTR(err));
}
8 changes: 7 additions & 1 deletion drivers/vfio/pci/pds/cmds.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,11 @@

int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio);

int pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type);
int pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type);
int pds_vfio_get_lm_state_size_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size);
int pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio);
int pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio,
enum pds_lm_host_vf_status vf_status);
#endif /* _CMDS_H_ */
Loading

0 comments on commit bb500db

Please sign in to comment.