Skip to content

Commit

Permalink
iommufd: Implement sw_msi support natively
Browse files Browse the repository at this point in the history
iommufd has a model where the iommu_domain can be changed while the VFIO
device is attached. In this case, the MSI should continue to work. This
corner case has not worked because the dma-iommu implementation of sw_msi
is tied to a single domain.

Implement the sw_msi mapping directly and use a global per-fd table to
associate assigned IOVA to the MSI pages. This allows the MSI pages to
be loaded into a domain before it is attached ensuring that MSI is not
disrupted.

Link: https://patch.msgid.link/r/e13d23eeacd67c0a692fc468c85b483f4dd51c57.1740014950.git.nicolinc@nvidia.com
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Jason Gunthorpe committed Feb 27, 2025
1 parent 748706d commit 40f5175
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 23 deletions.
161 changes: 139 additions & 22 deletions drivers/iommu/iommufd/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <linux/iommufd.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
#include <linux/msi.h>

#include "../iommu-priv.h"
#include "io_pagetable.h"
Expand Down Expand Up @@ -293,36 +294,152 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");

/*
* Get a iommufd_sw_msi_map for the msi physical address requested by the irq
* layer. The mapping to IOVA is global to the iommufd file descriptor, every
* domain that is attached to a device using the same MSI parameters will use
* the same IOVA.
*/
static __maybe_unused struct iommufd_sw_msi_map *
iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
phys_addr_t sw_msi_start)
{
struct iommufd_sw_msi_map *cur;
unsigned int max_pgoff = 0;

lockdep_assert_held(&ictx->sw_msi_lock);

list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
if (cur->sw_msi_start != sw_msi_start)
continue;
max_pgoff = max(max_pgoff, cur->pgoff + 1);
if (cur->msi_addr == msi_addr)
return cur;
}

if (ictx->sw_msi_id >=
BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
return ERR_PTR(-EOVERFLOW);

cur = kzalloc(sizeof(*cur), GFP_KERNEL);
if (!cur)
return ERR_PTR(-ENOMEM);

cur->sw_msi_start = sw_msi_start;
cur->msi_addr = msi_addr;
cur->pgoff = max_pgoff;
cur->id = ictx->sw_msi_id++;
list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
return cur;
}

static int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
struct iommufd_hwpt_paging *hwpt_paging,
struct iommufd_sw_msi_map *msi_map)
{
unsigned long iova;

lockdep_assert_held(&ictx->sw_msi_lock);

iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
int rc;

rc = iommu_map(hwpt_paging->common.domain, iova,
msi_map->msi_addr, PAGE_SIZE,
IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
GFP_KERNEL_ACCOUNT);
if (rc)
return rc;
__set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
}
return 0;
}

/*
* Called by the irq code if the platform translates the MSI address through the
* IOMMU. msi_addr is the physical address of the MSI page. iommufd will
* allocate a fd global iova for the physical page that is the same on all
* domains and devices.
*/
#ifdef CONFIG_IRQ_MSI_IOMMU
int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
phys_addr_t msi_addr)
{
struct device *dev = msi_desc_to_dev(desc);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommu_attach_handle *raw_handle;
struct iommufd_attach_handle *handle;
struct iommufd_sw_msi_map *msi_map;
struct iommufd_ctx *ictx;
unsigned long iova;
int rc;

/*
* It is safe to call iommu_attach_handle_get() here because the iommu
* core code invokes this under the group mutex which also prevents any
* change of the attach handle for the duration of this function.
*/
iommu_group_mutex_assert(dev);

raw_handle =
iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
if (IS_ERR(raw_handle))
return 0;
hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);

handle = to_iommufd_handle(raw_handle);
/* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
return 0;

ictx = handle->idev->ictx;
guard(mutex)(&ictx->sw_msi_lock);
/*
* The input msi_addr is the exact byte offset of the MSI doorbell, we
* assume the caller has checked that it is contained with a MMIO region
* that is secure to map at PAGE_SIZE.
*/
msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
msi_addr & PAGE_MASK,
handle->idev->igroup->sw_msi_start);
if (IS_ERR(msi_map))
return PTR_ERR(msi_map);

rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
if (rc)
return rc;
__set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);

iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
return 0;
}
#endif

static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
phys_addr_t sw_msi_start = igroup->sw_msi_start;
int rc;
struct iommufd_ctx *ictx = igroup->ictx;
struct iommufd_sw_msi_map *cur;

if (igroup->sw_msi_start == PHYS_ADDR_MAX)
return 0;

/*
* If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
* call iommu_get_msi_cookie() on its behalf. This is necessary to setup
* the MSI window so iommu_dma_prepare_msi() can install pages into our
* domain after request_irq(). If it is not done interrupts will not
* work on this domain.
*
* FIXME: This is conceptually broken for iommufd since we want to allow
* userspace to change the domains, eg switch from an identity IOAS to a
* DMA IOAS. There is currently no way to create a MSI window that
* matches what the IRQ layer actually expects in a newly created
* domain.
* Install all the MSI pages the device has been using into the domain
*/
if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
sw_msi_start);
guard(mutex)(&ictx->sw_msi_lock);
list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
int rc;

if (cur->sw_msi_start != igroup->sw_msi_start ||
!test_bit(cur->id, igroup->required_sw_msi.bitmap))
continue;

rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
if (rc)
return rc;

/*
* iommu_get_msi_cookie() can only be called once per domain,
* it returns -EBUSY on later calls.
*/
hwpt_paging->msi_cookie = true;
}
return 0;
}
Expand Down
3 changes: 3 additions & 0 deletions drivers/iommu/iommufd/hw_pagetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
}
iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);

/*
* Set the coherency mode before we do iopt_table_add_domain() as some
Expand Down Expand Up @@ -251,6 +252,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
goto out_abort;
}
hwpt->domain->owner = ops;
iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);

if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
Expand Down Expand Up @@ -307,6 +309,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
goto out_abort;
}
hwpt->domain->owner = viommu->iommu_dev->ops;
iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);

if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
Expand Down
23 changes: 22 additions & 1 deletion drivers/iommu/iommufd/iommufd_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,33 @@ struct iommu_group;
struct iommu_option;
struct iommufd_device;

struct iommufd_sw_msi_map {
struct list_head sw_msi_item;
phys_addr_t sw_msi_start;
phys_addr_t msi_addr;
unsigned int pgoff;
unsigned int id;
};

/* Bitmap of struct iommufd_sw_msi_map::id */
struct iommufd_sw_msi_maps {
DECLARE_BITMAP(bitmap, 64);
};

int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
phys_addr_t msi_addr);

struct iommufd_ctx {
struct file *file;
struct xarray objects;
struct xarray groups;
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;

struct mutex sw_msi_lock;
struct list_head sw_msi_list;
unsigned int sw_msi_id;

u8 account_mode;
/* Compatibility with VFIO no iommu */
u8 no_iommu_mode;
Expand Down Expand Up @@ -283,10 +303,10 @@ struct iommufd_hwpt_paging {
struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
bool msi_cookie : 1;
bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
struct iommufd_sw_msi_maps present_sw_msi;
};

struct iommufd_hwpt_nested {
Expand Down Expand Up @@ -383,6 +403,7 @@ struct iommufd_group {
struct iommu_group *group;
struct iommufd_hw_pagetable *hwpt;
struct list_head device_list;
struct iommufd_sw_msi_maps required_sw_msi;
phys_addr_t sw_msi_start;
};

Expand Down
9 changes: 9 additions & 0 deletions drivers/iommu/iommufd/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,17 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init(&ictx->groups);
ictx->file = filp;
init_waitqueue_head(&ictx->destroy_wait);
mutex_init(&ictx->sw_msi_lock);
INIT_LIST_HEAD(&ictx->sw_msi_list);
filp->private_data = ictx;
return 0;
}

static int iommufd_fops_release(struct inode *inode, struct file *filp)
{
struct iommufd_ctx *ictx = filp->private_data;
struct iommufd_sw_msi_map *next;
struct iommufd_sw_msi_map *cur;
struct iommufd_object *obj;

/*
Expand Down Expand Up @@ -262,6 +266,11 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
break;
}
WARN_ON(!xa_empty(&ictx->groups));

mutex_destroy(&ictx->sw_msi_lock);
list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item)
kfree(cur);

kfree(ictx);
return 0;
}
Expand Down

0 comments on commit 40f5175

Please sign in to comment.