Skip to content

Commit

Permalink
Merge tag 'vfio-v3.12-rc0' of git://github.com/awilliam/linux-vfio
Browse files Browse the repository at this point in the history
Pull VFIO update from Alex Williamson:
 "VFIO updates include safer default file flags for VFIO device fds, an
  external user interface exported to allow other modules to hold
  references to VFIO groups, a fix to test for extended config space on
  PCIe and PCI-x, and new hot reset interfaces for PCI devices which
  allows the user to do PCI bus/slot resets when all of the devices
  affected by the reset are owned by the user.

  For this last feature, the PCI bus reset interface, I depend on
  changes already merged from Bjorn's PCI pull request.  I therefore
  merged my tree up to commit cb3e433, which I think was the correct
  action, but as Stephen Rothwell noted, I failed to provide a commit
  message indicating why the merge was required.  Sorry for that.
  Thanks, Alex"

* tag 'vfio-v3.12-rc0' of git://github.com/awilliam/linux-vfio:
  vfio: fix documentation
  vfio-pci: PCI hot reset interface
  vfio-pci: Test for extended config space
  vfio-pci: Use fdget() rather than eventfd_fget()
  vfio: Add O_CLOEXEC flag to vfio device fd
  vfio: use get_unused_fd_flags(0) instead of get_unused_fd()
  vfio: add external user support
  • Loading branch information
Linus Torvalds committed Sep 9, 2013
2 parents bf97293 + dac09b5 commit d75671e
Show file tree
Hide file tree
Showing 7 changed files with 421 additions and 28 deletions.
8 changes: 4 additions & 4 deletions Documentation/vfio.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ group and can access them as follows:
int container, group, device, i;
struct vfio_group_status group_status =
{ .argsz = sizeof(group_status) };
struct vfio_iommu_x86_info iommu_info = { .argsz = sizeof(iommu_info) };
struct vfio_iommu_x86_dma_map dma_map = { .argsz = sizeof(dma_map) };
struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) };
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };

/* Create a new container */
Expand All @@ -193,7 +193,7 @@ group and can access them as follows:
ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);

/* Enable the IOMMU model we want */
ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)
ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);

/* Get addition IOMMU info */
ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info);
Expand Down Expand Up @@ -229,7 +229,7 @@ group and can access them as follows:

irq.index = i;

ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &reg);
ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);

/* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
}
Expand Down
286 changes: 285 additions & 1 deletion drivers/vfio/pci/vfio_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <linux/device.h>
#include <linux/eventfd.h>
#include <linux/file.h>
#include <linux/interrupt.h>
#include <linux/iommu.h>
#include <linux/module.h>
Expand Down Expand Up @@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
return 0;
}

static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
{
(*(int *)data)++;
return 0;
}

struct vfio_pci_fill_info {
int max;
int cur;
struct vfio_pci_dependent_device *devices;
};

static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
{
struct vfio_pci_fill_info *fill = data;
struct iommu_group *iommu_group;

if (fill->cur == fill->max)
return -EAGAIN; /* Something changed, try again */

iommu_group = iommu_group_get(&pdev->dev);
if (!iommu_group)
return -EPERM; /* Cannot reset non-isolated devices */

fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
fill->devices[fill->cur].bus = pdev->bus->number;
fill->devices[fill->cur].devfn = pdev->devfn;
fill->cur++;
iommu_group_put(iommu_group);
return 0;
}

struct vfio_pci_group_entry {
struct vfio_group *group;
int id;
};

struct vfio_pci_group_info {
int count;
struct vfio_pci_group_entry *groups;
};

static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
{
struct vfio_pci_group_info *info = data;
struct iommu_group *group;
int id, i;

group = iommu_group_get(&pdev->dev);
if (!group)
return -EPERM;

id = iommu_group_id(group);

for (i = 0; i < info->count; i++)
if (info->groups[i].id == id)
break;

iommu_group_put(group);

return (i == info->count) ? -EINVAL : 0;
}

static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
{
for (; pdev; pdev = pdev->bus->self)
if (pdev->bus == slot->bus)
return (pdev->slot == slot);
return false;
}

struct vfio_pci_walk_info {
int (*fn)(struct pci_dev *, void *data);
void *data;
struct pci_dev *pdev;
bool slot;
int ret;
};

static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
{
struct vfio_pci_walk_info *walk = data;

if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
walk->ret = walk->fn(pdev, walk->data);

return walk->ret;
}

static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
int (*fn)(struct pci_dev *,
void *data), void *data,
bool slot)
{
struct vfio_pci_walk_info walk = {
.fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
};

pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);

return walk.ret;
}

static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
Expand Down Expand Up @@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data,

return ret;

} else if (cmd == VFIO_DEVICE_RESET)
} else if (cmd == VFIO_DEVICE_RESET) {
return vdev->reset_works ?
pci_reset_function(vdev->pdev) : -EINVAL;

} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
struct vfio_pci_hot_reset_info hdr;
struct vfio_pci_fill_info fill = { 0 };
struct vfio_pci_dependent_device *devices = NULL;
bool slot = false;
int ret = 0;

minsz = offsetofend(struct vfio_pci_hot_reset_info, count);

if (copy_from_user(&hdr, (void __user *)arg, minsz))
return -EFAULT;

if (hdr.argsz < minsz)
return -EINVAL;

hdr.flags = 0;

/* Can we do a slot or bus reset or neither? */
if (!pci_probe_reset_slot(vdev->pdev->slot))
slot = true;
else if (pci_probe_reset_bus(vdev->pdev->bus))
return -ENODEV;

/* How many devices are affected? */
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_count_devs,
&fill.max, slot);
if (ret)
return ret;

WARN_ON(!fill.max); /* Should always be at least one */

/*
* If there's enough space, fill it now, otherwise return
* -ENOSPC and the number of devices affected.
*/
if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
ret = -ENOSPC;
hdr.count = fill.max;
goto reset_info_exit;
}

devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
if (!devices)
return -ENOMEM;

fill.devices = devices;

ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_fill_devs,
&fill, slot);

/*
* If a device was removed between counting and filling,
* we may come up short of fill.max. If a device was
* added, we'll have a return of -EAGAIN above.
*/
if (!ret)
hdr.count = fill.cur;

reset_info_exit:
if (copy_to_user((void __user *)arg, &hdr, minsz))
ret = -EFAULT;

if (!ret) {
if (copy_to_user((void __user *)(arg + minsz), devices,
hdr.count * sizeof(*devices)))
ret = -EFAULT;
}

kfree(devices);
return ret;

} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
struct vfio_pci_hot_reset hdr;
int32_t *group_fds;
struct vfio_pci_group_entry *groups;
struct vfio_pci_group_info info;
bool slot = false;
int i, count = 0, ret = 0;

minsz = offsetofend(struct vfio_pci_hot_reset, count);

if (copy_from_user(&hdr, (void __user *)arg, minsz))
return -EFAULT;

if (hdr.argsz < minsz || hdr.flags)
return -EINVAL;

/* Can we do a slot or bus reset or neither? */
if (!pci_probe_reset_slot(vdev->pdev->slot))
slot = true;
else if (pci_probe_reset_bus(vdev->pdev->bus))
return -ENODEV;

/*
* We can't let userspace give us an arbitrarily large
* buffer to copy, so verify how many we think there
* could be. Note groups can have multiple devices so
* one group per device is the max.
*/
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_count_devs,
&count, slot);
if (ret)
return ret;

/* Somewhere between 1 and count is OK */
if (!hdr.count || hdr.count > count)
return -EINVAL;

group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
if (!group_fds || !groups) {
kfree(group_fds);
kfree(groups);
return -ENOMEM;
}

if (copy_from_user(group_fds, (void __user *)(arg + minsz),
hdr.count * sizeof(*group_fds))) {
kfree(group_fds);
kfree(groups);
return -EFAULT;
}

/*
* For each group_fd, get the group through the vfio external
* user interface and store the group and iommu ID. This
* ensures the group is held across the reset.
*/
for (i = 0; i < hdr.count; i++) {
struct vfio_group *group;
struct fd f = fdget(group_fds[i]);
if (!f.file) {
ret = -EBADF;
break;
}

group = vfio_group_get_external_user(f.file);
fdput(f);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
break;
}

groups[i].group = group;
groups[i].id = vfio_external_user_iommu_id(group);
}

kfree(group_fds);

/* release reference to groups on error */
if (ret)
goto hot_reset_release;

info.count = hdr.count;
info.groups = groups;

/*
* Test whether all the affected devices are contained
* by the set of groups provided by the user.
*/
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_validate_devs,
&info, slot);
if (!ret)
/* User has access, do the reset */
ret = slot ? pci_reset_slot(vdev->pdev->slot) :
pci_reset_bus(vdev->pdev->bus);

hot_reset_release:
for (i--; i >= 0; i--)
vfio_group_put_external_user(groups[i].group);

kfree(groups);
return ret;
}

return -ENOTTY;
}

Expand Down
11 changes: 8 additions & 3 deletions drivers/vfio/pci/vfio_pci_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,7 @@ static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos)
static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
{
struct pci_dev *pdev = vdev->pdev;
u32 dword;
u16 word;
u8 byte;
int ret;
Expand All @@ -1025,7 +1026,9 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
return pcibios_err_to_errno(ret);

if (PCI_X_CMD_VERSION(word)) {
vdev->extended_caps = true;
/* Test for extended capabilities */
pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword);
vdev->extended_caps = (dword != 0);
return PCI_CAP_PCIX_SIZEOF_V2;
} else
return PCI_CAP_PCIX_SIZEOF_V0;
Expand All @@ -1037,9 +1040,11 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)

return byte;
case PCI_CAP_ID_EXP:
/* length based on version */
vdev->extended_caps = true;
/* Test for extended capabilities */
pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword);
vdev->extended_caps = (dword != 0);

/* length based on version */
if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1)
return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1;
else
Expand Down
Loading

0 comments on commit d75671e

Please sign in to comment.