From 28b41e2c6aebd3caf99a77a76843c0175876bc72 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 24 Nov 2020 21:06:01 +0800 Subject: [PATCH 1/5] iommu: Move def_domain type check for untrusted device into core So that the vendor iommu drivers are no more required to provide the def_domain_type callback to always isolate the untrusted devices. Signed-off-by: Lu Baolu Cc: Shameerali Kolothum Thodi Link: https://lore.kernel.org/linux-iommu/243ce89c33fe4b9da4c56ba35acebf81@huawei.com/ Link: https://lore.kernel.org/r/20201124130604.2912899-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 7 ------- drivers/iommu/iommu.c | 16 +++++++--------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1b1ca63e6bbee..83674e32e58b2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2916,13 +2916,6 @@ static int device_def_domain_type(struct device *dev) if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - /* - * Prevent any device marked as untrusted from getting - * placed into the statically identity mapping domain. - */ - if (pdev->untrusted) - return IOMMU_DOMAIN_DMA; - if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b53446bb8c6b4..9a5ec1c7a8ac2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1460,12 +1460,14 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group); static int iommu_get_def_domain_type(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; - unsigned int type = 0; + + if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) + return IOMMU_DOMAIN_DMA; if (ops->def_domain_type) - type = ops->def_domain_type(dev); + return ops->def_domain_type(dev); - return (type == 0) ? iommu_def_domain_type : type; + return 0; } static int iommu_group_alloc_default_domain(struct bus_type *bus, @@ -1507,7 +1509,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group, if (group->default_domain) return 0; - type = iommu_get_def_domain_type(dev); + type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; return iommu_group_alloc_default_domain(dev->bus, group, type); } @@ -1645,12 +1647,8 @@ struct __group_domain_type { static int probe_get_default_domain_type(struct device *dev, void *data) { - const struct iommu_ops *ops = dev->bus->iommu_ops; struct __group_domain_type *gtype = data; - unsigned int type = 0; - - if (ops->def_domain_type) - type = ops->def_domain_type(dev); + unsigned int type = iommu_get_def_domain_type(dev); if (type) { if (gtype->type && gtype->type != type) { From 08a27c1c3ecf5e1da193ce5f8fc97c3be16e75f0 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Tue, 24 Nov 2020 21:06:02 +0800 Subject: [PATCH 2/5] iommu: Add support to change default domain of an iommu group Presently, the default domain of an iommu group is allocated during boot time and it cannot be changed later. So, the device would typically be either in identity (also known as pass_through) mode or the device would be in DMA mode as long as the machine is up and running. There is no way to change the default domain type dynamically i.e. after booting, a device cannot switch between identity mode and DMA mode. But, assume a use case wherein the user trusts the device and believes that the OS is secure enough and hence wants *only* this device to bypass IOMMU (so that it could be high performing) whereas all the other devices to go through IOMMU (so that the system is protected). Presently, this use case is not supported. It will be helpful if there is some way to change the default domain of an iommu group dynamically. Hence, add such support. A privileged user could request the kernel to change the default domain type of a iommu group by writing to "/sys/kernel/iommu_groups//type" file. Presently, only three values are supported 1. identity: all the DMA transactions from the device in this group are *not* translated by the iommu 2. DMA: all the DMA transactions from the device in this group are translated by the iommu 3. auto: change to the type the device was booted with Note: 1. Default domain of an iommu group with two or more devices cannot be changed. 2. The device in the iommu group shouldn't be bound to any driver. 3. The device shouldn't be assigned to user for direct access. 4. The change request will fail if any device in the group has a mandatory default domain type and the requested one conflicts with that. Please see "Documentation/ABI/testing/sysfs-kernel-iommu_groups" for more information. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Lu Baolu Cc: Christoph Hellwig Cc: Joerg Roedel Cc: Ashok Raj Cc: Will Deacon Cc: Sohil Mehta Cc: Robin Murphy Cc: Jacob Pan Link: https://lore.kernel.org/r/20201124130604.2912899-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 230 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 229 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9a5ec1c7a8ac2..b326389deea4a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -93,6 +93,8 @@ static void __iommu_detach_group(struct iommu_domain *domain, static int iommu_create_device_direct_mappings(struct iommu_group *group, struct device *dev); static struct iommu_group *iommu_group_get_for_dev(struct device *dev); +static ssize_t iommu_group_store_type(struct iommu_group *group, + const char *buf, size_t count); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -525,7 +527,8 @@ static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); static IOMMU_GROUP_ATTR(reserved_regions, 0444, iommu_group_show_resv_regions, NULL); -static IOMMU_GROUP_ATTR(type, 0444, iommu_group_show_type, NULL); +static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, + iommu_group_store_type); static void iommu_group_release(struct kobject *kobj) { @@ -3027,3 +3030,228 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) return ops->sva_get_pasid(handle); } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); + +/* + * Changes the default domain of an iommu group that has *only* one device + * + * @group: The group for which the default domain should be changed + * @prev_dev: The device in the group (this is used to make sure that the device + * hasn't changed after the caller has called this function) + * @type: The type of the new default domain that gets associated with the group + * + * Returns 0 on success and error code on failure + * + * Note: + * 1. Presently, this function is called only when user requests to change the + * group's default domain type through /sys/kernel/iommu_groups//type + * Please take a closer look if intended to use for other purposes. + */ +static int iommu_change_dev_def_domain(struct iommu_group *group, + struct device *prev_dev, int type) +{ + struct iommu_domain *prev_dom; + struct group_device *grp_dev; + int ret, dev_def_dom; + struct device *dev; + + if (!group) + return -EINVAL; + + mutex_lock(&group->mutex); + + if (group->default_domain != group->domain) { + dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); + ret = -EBUSY; + goto out; + } + + /* + * iommu group wasn't locked while acquiring device lock in + * iommu_group_store_type(). So, make sure that the device count hasn't + * changed while acquiring device lock. + * + * Changing default domain of an iommu group with two or more devices + * isn't supported because there could be a potential deadlock. Consider + * the following scenario. T1 is trying to acquire device locks of all + * the devices in the group and before it could acquire all of them, + * there could be another thread T2 (from different sub-system and use + * case) that has already acquired some of the device locks and might be + * waiting for T1 to release other device locks. + */ + if (iommu_group_device_count(group) != 1) { + dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); + ret = -EINVAL; + goto out; + } + + /* Since group has only one device */ + grp_dev = list_first_entry(&group->devices, struct group_device, list); + dev = grp_dev->dev; + + if (prev_dev != dev) { + dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); + ret = -EBUSY; + goto out; + } + + prev_dom = group->default_domain; + if (!prev_dom) { + ret = -EINVAL; + goto out; + } + + dev_def_dom = iommu_get_def_domain_type(dev); + if (!type) { + /* + * If the user hasn't requested any specific type of domain and + * if the device supports both the domains, then default to the + * domain the device was booted with + */ + type = dev_def_dom ? : iommu_def_domain_type; + } else if (dev_def_dom && type != dev_def_dom) { + dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", + iommu_domain_type_str(type)); + ret = -EINVAL; + goto out; + } + + /* + * Switch to a new domain only if the requested domain type is different + * from the existing default domain type + */ + if (prev_dom->type == type) { + ret = 0; + goto out; + } + + /* Sets group->default_domain to the newly allocated domain */ + ret = iommu_group_alloc_default_domain(dev->bus, group, type); + if (ret) + goto out; + + ret = iommu_create_device_direct_mappings(group, dev); + if (ret) + goto free_new_domain; + + ret = __iommu_attach_device(group->default_domain, dev); + if (ret) + goto free_new_domain; + + group->domain = group->default_domain; + + /* + * Release the mutex here because ops->probe_finalize() call-back of + * some vendor IOMMU drivers calls arm_iommu_attach_device() which + * in-turn might call back into IOMMU core code, where it tries to take + * group->mutex, resulting in a deadlock. + */ + mutex_unlock(&group->mutex); + + /* Make sure dma_ops is appropriatley set */ + iommu_group_do_probe_finalize(dev, group->default_domain); + iommu_domain_free(prev_dom); + return 0; + +free_new_domain: + iommu_domain_free(group->default_domain); + group->default_domain = prev_dom; + group->domain = prev_dom; + +out: + mutex_unlock(&group->mutex); + + return ret; +} + +/* + * Changing the default domain through sysfs requires the users to ubind the + * drivers from the devices in the iommu group. Return failure if this doesn't + * meet. + * + * We need to consider the race between this and the device release path. + * device_lock(dev) is used here to guarantee that the device release path + * will not be entered at the same time. + */ +static ssize_t iommu_group_store_type(struct iommu_group *group, + const char *buf, size_t count) +{ + struct group_device *grp_dev; + struct device *dev; + int ret, req_type; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + + if (WARN_ON(!group)) + return -EINVAL; + + if (sysfs_streq(buf, "identity")) + req_type = IOMMU_DOMAIN_IDENTITY; + else if (sysfs_streq(buf, "DMA")) + req_type = IOMMU_DOMAIN_DMA; + else if (sysfs_streq(buf, "auto")) + req_type = 0; + else + return -EINVAL; + + /* + * Lock/Unlock the group mutex here before device lock to + * 1. Make sure that the iommu group has only one device (this is a + * prerequisite for step 2) + * 2. Get struct *dev which is needed to lock device + */ + mutex_lock(&group->mutex); + if (iommu_group_device_count(group) != 1) { + mutex_unlock(&group->mutex); + pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); + return -EINVAL; + } + + /* Since group has only one device */ + grp_dev = list_first_entry(&group->devices, struct group_device, list); + dev = grp_dev->dev; + get_device(dev); + + /* + * Don't hold the group mutex because taking group mutex first and then + * the device lock could potentially cause a deadlock as below. Assume + * two threads T1 and T2. T1 is trying to change default domain of an + * iommu group and T2 is trying to hot unplug a device or release [1] VF + * of a PCIe device which is in the same iommu group. T1 takes group + * mutex and before it could take device lock assume T2 has taken device + * lock and is yet to take group mutex. Now, both the threads will be + * waiting for the other thread to release lock. Below, lock order was + * suggested. + * device_lock(dev); + * mutex_lock(&group->mutex); + * iommu_change_dev_def_domain(); + * mutex_unlock(&group->mutex); + * device_unlock(dev); + * + * [1] Typical device release path + * device_lock() from device/driver core code + * -> bus_notifier() + * -> iommu_bus_notifier() + * -> iommu_release_device() + * -> ops->release_device() vendor driver calls back iommu core code + * -> mutex_lock() from iommu core code + */ + mutex_unlock(&group->mutex); + + /* Check if the device in the group still has a driver bound to it */ + device_lock(dev); + if (device_is_bound(dev)) { + pr_err_ratelimited("Device is still bound to driver\n"); + ret = -EBUSY; + goto out; + } + + ret = iommu_change_dev_def_domain(group, dev, req_type); + ret = ret ?: count; + +out: + device_unlock(dev); + put_device(dev); + + return ret; +} From 0b8a96a3120ffe4d3571d93902693c59f90c3d0c Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Tue, 24 Nov 2020 21:06:03 +0800 Subject: [PATCH 3/5] iommu: Take lock before reading iommu group default domain type "/sys/kernel/iommu_groups//type" file could be read to find out the default domain type of an iommu group. The default domain of an iommu group doesn't change after booting and hence could be read directly. But, after addding support to dynamically change iommu group default domain, the above assumption no longer stays valid. iommu group default domain type could be changed at any time by writing to "/sys/kernel/iommu_groups//type". So, take group mutex before reading iommu group default domain type so that the user wouldn't see stale values or iommu_group_show_type() doesn't try to derefernce stale pointers. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Lu Baolu Cc: Christoph Hellwig Cc: Joerg Roedel Cc: Ashok Raj Cc: Will Deacon Cc: Sohil Mehta Cc: Robin Murphy Cc: Jacob Pan Link: https://lore.kernel.org/r/20201124130604.2912899-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b326389deea4a..cc06dcadff8f5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -501,6 +501,7 @@ static ssize_t iommu_group_show_type(struct iommu_group *group, { char *type = "unknown\n"; + mutex_lock(&group->mutex); if (group->default_domain) { switch (group->default_domain->type) { case IOMMU_DOMAIN_BLOCKED: @@ -517,6 +518,7 @@ static ssize_t iommu_group_show_type(struct iommu_group *group, break; } } + mutex_unlock(&group->mutex); strcpy(buf, type); return strlen(type); From 63a816749d8670e4a92da5aecfb91238821a3d97 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Tue, 24 Nov 2020 21:06:04 +0800 Subject: [PATCH 4/5] iommu: Document usage of "/sys/kernel/iommu_groups//type" file The default domain type of an iommu group can be changed by writing to "/sys/kernel/iommu_groups//type" file. Hence, document it's usage and more importantly spell out its limitations. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Lu Baolu Cc: Christoph Hellwig Cc: Joerg Roedel Cc: Ashok Raj Cc: Will Deacon Cc: Sohil Mehta Cc: Robin Murphy Cc: Jacob Pan Link: https://lore.kernel.org/r/20201124130604.2912899-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- .../ABI/testing/sysfs-kernel-iommu_groups | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index 017f5bc3920ce..407b1628d7fdb 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -33,3 +33,32 @@ Description: In case an RMRR is used only by graphics or USB devices it is now exposed as "direct-relaxable" instead of "direct". In device assignment use case, for instance, those RMRR are considered to be relaxable and safe. + +What: /sys/kernel/iommu_groups//type +Date: November 2020 +KernelVersion: v5.11 +Contact: Sai Praneeth Prakhya +Description: /sys/kernel/iommu_groups//type shows the type of default + domain in use by iommu for this group. See include/linux/iommu.h + for possible values. A privileged user could request kernel to + change the group type by writing to this file. Presently, only + three types of request are supported: + 1. DMA: All the DMA transactions from the device in this group + are translated by the iommu. + 2. identity: All the DMA transactions from the device in this + group are *not* translated by the iommu. + 3. auto: Change to the type the device was booted with. + Note: + ----- + The default domain type of a group may be modified only when + 1. The group has *only* one device + 2. The device in the group is not bound to any device driver. + So, the users must unbind the appropriate driver before + changing the default domain type. + Caution: + -------- + Unbinding a device driver will take away the driver's control + over the device and if done on devices that host root file + system could lead to catastrophic effects (the users might + need to reboot the machine to get it to normal state). So, it's + expected that the users understand what they're doing. From 62c9917d9c1041ba175ccf1bc4c010efc0188a2e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 26 Nov 2020 17:06:03 +0800 Subject: [PATCH 5/5] iommu: Fix htmldocs warnings in sysfs-kernel-iommu_groups Below warnings are fixed: Documentation/ABI/testing/sysfs-kernel-iommu_groups:38: WARNING: Unexpected indentation. Documentation/ABI/testing/sysfs-kernel-iommu_groups:38: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/ABI/testing/sysfs-kernel-iommu_groups:38: WARNING: Enumerated list ends without a blank line; unexpected unindent. Documentation/ABI/testing/sysfs-kernel-iommu_groups:38: WARNING: Unexpected indentation. Documentation/ABI/testing/sysfs-kernel-iommu_groups:38: WARNING: Block quote ends without a blank line; unexpected unindent. Fixes: 63a816749d86 ("iommu: Document usage of "/sys/kernel/iommu_groups//type" file") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/linux-next/20201126174851.200e0e58@canb.auug.org.au/ Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201126090603.1511589-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- .../ABI/testing/sysfs-kernel-iommu_groups | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index 407b1628d7fdb..0fedbb0f94e4f 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -40,23 +40,24 @@ KernelVersion: v5.11 Contact: Sai Praneeth Prakhya Description: /sys/kernel/iommu_groups//type shows the type of default domain in use by iommu for this group. See include/linux/iommu.h - for possible values. A privileged user could request kernel to - change the group type by writing to this file. Presently, only - three types of request are supported: - 1. DMA: All the DMA transactions from the device in this group - are translated by the iommu. - 2. identity: All the DMA transactions from the device in this - group are *not* translated by the iommu. - 3. auto: Change to the type the device was booted with. - Note: - ----- + for possible read values. A privileged user could request kernel to + change the group type by writing to this file. Valid write values: + + ======== ====================================================== + DMA All the DMA transactions from the device in this group + are translated by the iommu. + identity All the DMA transactions from the device in this group + are not translated by the iommu. + auto Change to the type the device was booted with. + ======== ====================================================== + The default domain type of a group may be modified only when - 1. The group has *only* one device - 2. The device in the group is not bound to any device driver. - So, the users must unbind the appropriate driver before - changing the default domain type. - Caution: - -------- + + - The group has only one device. + - The device in the group is not bound to any device driver. + So, the users must unbind the appropriate driver before + changing the default domain type. + Unbinding a device driver will take away the driver's control over the device and if done on devices that host root file system could lead to catastrophic effects (the users might