From 927d8f272e4f2ff20acbf3d4c21119b0e9d17cc2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 1 Aug 2022 19:47:58 +0300 Subject: [PATCH 01/59] iommu: Do not dereference fwnode in struct device In order to make the underneath API easier to change in the future, prevent users from dereferencing fwnode from struct device. Instead, use the specific dev_fwnode() API for that. Signed-off-by: Andy Shevchenko Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220801164758.20664-1-andriy.shevchenko@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 780fb7071577..31b5f4ceb2e9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -173,7 +173,7 @@ int iommu_device_register(struct iommu_device *iommu, iommu->ops = ops; if (hwdev) - iommu->fwnode = hwdev->fwnode; + iommu->fwnode = dev_fwnode(hwdev); spin_lock(&iommu_device_lock); list_add_tail(&iommu->list, &iommu_device_list); From 0c9ccaf24efa5b8f728445272ebc84754da2a512 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 1 Aug 2022 19:51:42 +0300 Subject: [PATCH 02/59] iommu/virtio: Do not dereference fwnode in struct device In order to make the underneath API easier to change in the future, prevent users from dereferencing fwnode from struct device. Instead, use the specific device_match_fwnode() API for that. Signed-off-by: Andy Shevchenko Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220801165142.20898-1-andriy.shevchenko@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 08eeafc9529f..9fe723f55213 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -925,7 +925,7 @@ static struct virtio_driver virtio_iommu_drv; static int viommu_match_node(struct device *dev, const void *data) { - return dev->parent->fwnode == data; + return device_match_fwnode(dev->parent, data); } static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) From 184233a5202786b20220acd2d04ddf909ef18f29 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:32:39 +0300 Subject: [PATCH 03/59] iommu/omap: Fix buffer overflow in debugfs There are two issues here: 1) The "len" variable needs to be checked before the very first write. Otherwise if omap2_iommu_dump_ctx() with "bytes" less than 32 it is a buffer overflow. 2) The snprintf() function returns the number of bytes that *would* have been copied if there were enough space. But we want to know the number of bytes which were *actually* copied so use scnprintf() instead. Fixes: bd4396f09a4a ("iommu/omap: Consolidate OMAP IOMMU modules") Signed-off-by: Dan Carpenter Reviewed-by: Robin Murphy Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/YuvYh1JbE3v+abd5@kili Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu-debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index a99afb5d9011..259f65291d90 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -32,12 +32,12 @@ static inline bool is_omap_iommu_detached(struct omap_iommu *obj) ssize_t bytes; \ const char *str = "%20s: %08x\n"; \ const int maxcol = 32; \ - bytes = snprintf(p, maxcol, str, __stringify(name), \ + if (len < maxcol) \ + goto out; \ + bytes = scnprintf(p, maxcol, str, __stringify(name), \ iommu_read_reg(obj, MMU_##name)); \ p += bytes; \ len -= bytes; \ - if (len < maxcol) \ - goto out; \ } while (0) static ssize_t From 283945017cbf685546ba7d065f254ad77eb888b1 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 15 Aug 2022 01:33:39 +0000 Subject: [PATCH 04/59] iommu: Remove comment of dev_has_feat in struct doc dev_has_feat has been removed from iommu_ops in commit 309c56e84602 ("iommu: remove the unused dev_has_feat method"), remove its description in the struct doc. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220815013339.2552-1-yuancan@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ea30f00dc145..a004e87e0e1d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -212,7 +212,7 @@ struct iommu_iotlb_gather { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device From bf75eb44e11bcdb1edda3305dc7292b605c25172 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 15 Aug 2022 03:14:23 +0000 Subject: [PATCH 05/59] iommu: Remove duplicate ida_free in iommu_group_alloc In the iommu_group_alloc, when the kobject_init_and_add failed, the group->kobj is associate with iommu_group_ktype, thus its release function iommu_group_release will be called by the following kobject_put. The iommu_group_release calls ida_free with the group->id, so we do not need to do it before kobject_put. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220815031423.94548-1-yuancan@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 31b5f4ceb2e9..12e49dcf91bd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -654,7 +654,6 @@ struct iommu_group *iommu_group_alloc(void) ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, NULL, "%d", group->id); if (ret) { - ida_free(&iommu_group_ida, group->id); kobject_put(&group->kobj); return ERR_PTR(ret); } From 359ad15763762c713a51300134e784a72eb9cb80 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 16:26:49 +0100 Subject: [PATCH 06/59] iommu: Retire iommu_capable() With all callers now converted to the device-specific version, retire the old bus-based interface, and give drivers the chance to indicate accurate per-instance capabilities. Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/d8bd8777d06929ad8f49df7fc80e1b9af32a41b5.1660574547.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 2 +- drivers/iommu/fsl_pamu_domain.c | 2 +- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/iommu.c | 11 +---------- drivers/iommu/s390-iommu.c | 2 +- include/linux/iommu.h | 8 +------- 9 files changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 65b8e4fd8217..94220eb5bff3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2251,7 +2251,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return ops->iova_to_phys(ops, iova); } -static bool amd_iommu_capable(enum iommu_cap cap) +static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d32b02336411..ab919ec43c4d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1992,7 +1992,7 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { }; /* IOMMU API */ -static bool arm_smmu_capable(enum iommu_cap cap) +static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index dfa82df00342..ce036a053fb8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1330,7 +1330,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return ops->iova_to_phys(ops, iova); } -static bool arm_smmu_capable(enum iommu_cap cap) +static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 17235116d3bb..66ca47f2e57f 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -493,7 +493,7 @@ static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, return ret; } -static bool qcom_iommu_capable(enum iommu_cap cap) +static bool qcom_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 011f9ab7f743..08fd9089e3ba 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -178,7 +178,7 @@ static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, return iova; } -static bool fsl_pamu_capable(enum iommu_cap cap) +static bool fsl_pamu_capable(struct device *dev, enum iommu_cap cap) { return cap == IOMMU_CAP_CACHE_COHERENCY; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7cca030a508e..da63b358ef4a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4429,7 +4429,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; } -static bool intel_iommu_capable(enum iommu_cap cap) +static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) return true; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 12e49dcf91bd..55d242f16824 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1868,19 +1868,10 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) if (!ops->capable) return false; - return ops->capable(cap); + return ops->capable(dev, cap); } EXPORT_SYMBOL_GPL(device_iommu_capable); -bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - if (!bus->iommu_ops || !bus->iommu_ops->capable) - return false; - - return bus->iommu_ops->capable(cap); -} -EXPORT_SYMBOL_GPL(iommu_capable); - /** * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index c898bcbbce11..cd0ee89d63e0 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -39,7 +39,7 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom) return container_of(dom, struct s390_domain, domain); } -static bool s390_iommu_capable(enum iommu_cap cap) +static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a004e87e0e1d..0013a1befe7b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -227,7 +227,7 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops */ struct iommu_ops { - bool (*capable)(enum iommu_cap); + bool (*capable)(struct device *dev, enum iommu_cap); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); @@ -420,7 +420,6 @@ extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); @@ -697,11 +696,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - return false; -} - static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; From df198b37e72c18c771d93ffbaf2176c0270505f3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 16:26:50 +0100 Subject: [PATCH 07/59] iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY better Assuming that any SMMU can enforce coherency for any device is clearly nonsense. Although technically even a single SMMU instance can be wired up to only be capable of emitting coherent traffic for some of the devices it translates, it's a fairly realistic approximation that if the SMMU's pagetable walker is wired up to a coherent interconnect then all its translation units probably are too, and conversely that lack of coherent table walks implies a non-coherent system in general. Either way it's still less inaccurate than what we've been claiming so far. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/106c9741415f0b6358c72d53ae9c78c553a2b45c.1660574547.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 5 ++++- drivers/iommu/arm/arm-smmu/arm-smmu.c | 9 ++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab919ec43c4d..fceab59113ba 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1994,9 +1994,12 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { /* IOMMU API */ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - return true; + /* Assume that a coherent TCU implies coherent TBUs */ + return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; case IOMMU_CAP_NOEXEC: return true; default: diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index ce036a053fb8..8039c8bc8470 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1332,13 +1332,12 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - /* - * Return true here as the SMMU can always send out coherent - * requests. - */ - return true; + /* Assume that a coherent TCU implies coherent TBUs */ + return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; case IOMMU_CAP_NOEXEC: return true; default: From ca25ec247aadbff98083e92c5e79c198a16cd2db Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:15:55 +0100 Subject: [PATCH 08/59] iommu/io-pgtable-arm: Remove iommu_dev==NULL special case The special case to allow iommu_dev==NULL in __arm_lpae_alloc_pages() is confusing to static checkers (and possibly readers in general), since it's not obvious that that is only intended for the selftests. However it only serves to get around the dev_to_node() call, and we can easily fake up enough to make that work anyway, so let's simply remove this consideration from the normal flow and punt the responsibility over to the test harness itself. Reported-by: Rustam Subkhankulov Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e2095eeda305071cb56c2cb8ac8a82dc3bd4dcab.1660580155.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 94ff319ae8ac..873a92bbd80c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -200,8 +200,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, void *pages; VM_BUG_ON((gfp & __GFP_HIGHMEM)); - p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE, - gfp | __GFP_ZERO, order); + p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); if (!p) return NULL; @@ -1343,12 +1342,17 @@ static int __init arm_lpae_do_selftests(void) }; int i, j, pass = 0, fail = 0; + struct device dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, .coherent_walk = true, + .iommu_dev = &dev, }; + /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ + set_dev_node(&dev, NUMA_NO_NODE); + for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { for (j = 0; j < ARRAY_SIZE(ias); ++j) { cfg.pgsize_bitmap = pgsize[i]; From c919739ce4721ecf7b96b99253b032df30fcf19b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:02 +0100 Subject: [PATCH 09/59] iommu/vt-d: Handle race between registration and device probe Currently we rely on registering all our instances before initially allowing any .probe_device calls via bus_set_iommu(). In preparation for phasing out the latter, make sure we won't inadvertently return success for a device associated with a known but not yet registered instance, otherwise we'll run straight into iommu_group_get_for_dev() trying to use NULL ops. That also highlights an issue with intel_iommu_get_resv_regions() taking dmar_global_lock from within a section where intel_iommu_init() already holds it, which already exists via probe_acpi_namespace_devices() when an ANDD device is probed, but gets more obvious with the upcoming change to iommu_device_register(). Since they are both read locks it manages not to deadlock in practice, and a more in-depth rework of this locking is underway, so no attempt is made to address it here. Reviewed-by: Kevin Tian Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/579f2692291bcbfc3ac64f7456fcff0d629af131.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index da63b358ef4a..5d3c220a0308 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4449,7 +4449,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) u8 bus, devfn; iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) + if (!iommu || !iommu->iommu.ops) return ERR_PTR(-ENODEV); info = kzalloc(sizeof(*info), GFP_KERNEL); From cbc040081fdf7d73ef60112b24caa785b3f293f3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:03 +0100 Subject: [PATCH 10/59] iommu/amd: Handle race between registration and device probe As for the Intel driver, make sure the AMD driver can cope with seeing .probe_device calls without having to wait for all known instances to register first. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/a8d8ebe12b411d28972f1ab928c6db92e8913cf5.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 94220eb5bff3..85c4122b5d61 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1851,6 +1851,10 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) if (!iommu) return ERR_PTR(-ENODEV); + /* Not registered yet? */ + if (!iommu->iommu.ops) + return ERR_PTR(-ENODEV); + if (dev_iommu_priv_get(dev)) return &iommu->iommu; From 927a5fdd949a03a244c1f5b3e6103aaadd547542 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 15 Aug 2022 17:20:04 +0100 Subject: [PATCH 11/59] iommu/s390: Fail probe for non-PCI devices s390-iommu only supports pci_bus_type today. Tested-by: Niklas Schnelle Signed-off-by: Matthew Rosato Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/8cb71ea1b24bd2622c1937bd9cfffe73b126eb56.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index cd0ee89d63e0..ac4dab6d79e2 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -185,7 +185,12 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, static struct iommu_device *s390_iommu_probe_device(struct device *dev) { - struct zpci_dev *zdev = to_zpci_dev(dev); + struct zpci_dev *zdev; + + if (!dev_is_pci(dev)) + return ERR_PTR(-ENODEV); + + zdev = to_zpci_dev(dev); return &zdev->iommu_dev; } From c13dbc1e24854908376c40ccaeb7a3a3c111e3af Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:05 +0100 Subject: [PATCH 12/59] iommu: Always register bus notifiers The number of bus types that the IOMMU subsystem deals with is small and manageable, so pull that list into core code as a first step towards cleaning up all the boilerplate bus-awareness from drivers. Calling iommu_probe_device() before bus->iommu_ops is set will simply return -ENODEV and not break the notifier call chain, so there should be no harm in proactively registering all our bus notifiers at init time. Tested-by: Marek Szyprowski Tested-by: Matthew Rosato # s390 Tested-by: Niklas Schnelle # s390 Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/7462347bf938bd6eedb629a3a318434f6516e712.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 72 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 55d242f16824..49f0445f2654 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "iommu: " fmt +#include #include #include #include @@ -16,11 +17,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -75,6 +78,8 @@ static const char * const iommu_group_resv_type_string[] = { #define IOMMU_CMD_LINE_DMA_API BIT(0) #define IOMMU_CMD_LINE_STRICT BIT(1) +static int iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data); static int iommu_alloc_default_domain(struct iommu_group *group, struct device *dev); static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, @@ -103,6 +108,22 @@ struct iommu_group_attribute iommu_group_attr_##_name = \ static LIST_HEAD(iommu_device_list); static DEFINE_SPINLOCK(iommu_device_lock); +static struct bus_type * const iommu_buses[] = { + &platform_bus_type, +#ifdef CONFIG_PCI + &pci_bus_type, +#endif +#ifdef CONFIG_ARM_AMBA + &amba_bustype, +#endif +#ifdef CONFIG_FSL_MC_BUS + &fsl_mc_bus_type, +#endif +#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS + &host1x_context_device_bus_type, +#endif +}; + /* * Use a function instead of an array here because the domain-type is a * bit-field, so an array would waste memory. @@ -126,6 +147,8 @@ static const char *iommu_domain_type_str(unsigned int t) static int __init iommu_subsys_init(void) { + struct notifier_block *nb; + if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) iommu_set_default_passthrough(false); @@ -152,6 +175,15 @@ static int __init iommu_subsys_init(void) (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? "(set via kernel command line)" : ""); + nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); + if (!nb) + return -ENOMEM; + + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { + nb[i].notifier_call = iommu_bus_notifier; + bus_register_notifier(iommu_buses[i], &nb[i]); + } + return 0; } subsys_initcall(iommu_subsys_init); @@ -1774,39 +1806,6 @@ int bus_iommu_probe(struct bus_type *bus) return ret; } -static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) -{ - struct notifier_block *nb; - int err; - - nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); - if (!nb) - return -ENOMEM; - - nb->notifier_call = iommu_bus_notifier; - - err = bus_register_notifier(bus, nb); - if (err) - goto out_free; - - err = bus_iommu_probe(bus); - if (err) - goto out_err; - - - return 0; - -out_err: - /* Clean up */ - bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); - bus_unregister_notifier(bus, nb); - -out_free: - kfree(nb); - - return err; -} - /** * bus_set_iommu - set iommu-callbacks for the bus * @bus: bus. @@ -1835,9 +1834,12 @@ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) bus->iommu_ops = ops; /* Do IOMMU specific setup for this bus-type */ - err = iommu_bus_init(bus, ops); - if (err) + err = bus_iommu_probe(bus); + if (err) { + /* Clean up */ + bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); bus->iommu_ops = NULL; + } return err; } From 57365a04c92126525a58bf7a1599ddfa832415e9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:06 +0100 Subject: [PATCH 13/59] iommu: Move bus setup to IOMMU device registration Move the bus setup to iommu_device_register(). This should allow bus_iommu_probe() to be correctly replayed for multiple IOMMU instances, and leaves bus_set_iommu() as a glorified no-op to be cleaned up next. At this point we can also handle cleanup better than just rolling back the most-recently-touched bus upon failure - which may release devices owned by other already-registered instances, and still leave devices on other buses with dangling pointers to the failed instance. Now it's easy to clean up the exact footprint of a given instance, no more, no less. Tested-by: Marek Szyprowski Reviewed-by: Krishna Reddy Reviewed-by: Kevin Tian Tested-by: Matthew Rosato # s390 Tested-by: Niklas Schnelle # s390 Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d342b6f27efb5ef3e93aacaa3012d25386d74866.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 55 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 49f0445f2654..422d4e9f980b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -188,6 +188,14 @@ static int __init iommu_subsys_init(void) } subsys_initcall(iommu_subsys_init); +static int remove_iommu_group(struct device *dev, void *data) +{ + if (dev->iommu && dev->iommu->iommu_dev == data) + iommu_release_device(dev); + + return 0; +} + /** * iommu_device_register() - Register an IOMMU hardware instance * @iommu: IOMMU handle for the instance @@ -199,9 +207,18 @@ subsys_initcall(iommu_subsys_init); int iommu_device_register(struct iommu_device *iommu, const struct iommu_ops *ops, struct device *hwdev) { + int err = 0; + /* We need to be able to take module references appropriately */ if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) return -EINVAL; + /* + * Temporarily enforce global restriction to a single driver. This was + * already the de-facto behaviour, since any possible combination of + * existing drivers would compete for at least the PCI or platform bus. + */ + if (iommu_buses[0]->iommu_ops && iommu_buses[0]->iommu_ops != ops) + return -EBUSY; iommu->ops = ops; if (hwdev) @@ -210,12 +227,22 @@ int iommu_device_register(struct iommu_device *iommu, spin_lock(&iommu_device_lock); list_add_tail(&iommu->list, &iommu_device_list); spin_unlock(&iommu_device_lock); - return 0; + + for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) { + iommu_buses[i]->iommu_ops = ops; + err = bus_iommu_probe(iommu_buses[i]); + } + if (err) + iommu_device_unregister(iommu); + return err; } EXPORT_SYMBOL_GPL(iommu_device_register); void iommu_device_unregister(struct iommu_device *iommu) { + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) + bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); + spin_lock(&iommu_device_lock); list_del(&iommu->list); spin_unlock(&iommu_device_lock); @@ -1643,13 +1670,6 @@ static int probe_iommu_group(struct device *dev, void *data) return ret; } -static int remove_iommu_group(struct device *dev, void *data) -{ - iommu_release_device(dev); - - return 0; -} - static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1821,27 +1841,12 @@ int bus_iommu_probe(struct bus_type *bus) */ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) { - int err; - - if (ops == NULL) { - bus->iommu_ops = NULL; - return 0; - } - - if (bus->iommu_ops != NULL) + if (bus->iommu_ops && ops && bus->iommu_ops != ops) return -EBUSY; bus->iommu_ops = ops; - /* Do IOMMU specific setup for this bus-type */ - err = bus_iommu_probe(bus); - if (err) { - /* Clean up */ - bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); - bus->iommu_ops = NULL; - } - - return err; + return 0; } EXPORT_SYMBOL_GPL(bus_set_iommu); From 31ee890a01fd2bb90f95e44ba441cccc47660f18 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:07 +0100 Subject: [PATCH 14/59] iommu/amd: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and garbage-collect the last remnants of amd_iommu_init_api(). Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/6bcc367e8802ae5a2b2840cbe4e9661ee024e80e.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/init.c | 9 +-------- drivers/iommu/amd/iommu.c | 21 --------------------- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 84e5bb1bf01b..c160a332ce33 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -18,7 +18,6 @@ extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); -extern int amd_iommu_init_api(void); extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index fdc642362c14..79e0286da6ce 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2168,20 +2168,13 @@ static int __init amd_iommu_init_pci(void) /* * Order is important here to make sure any unity map requirements are * fulfilled. The unity mappings are created and written to the device - * table during the amd_iommu_init_api() call. + * table during the iommu_init_pci() call. * * After that we call init_device_table_dma() to make sure any * uninitialized DTE will block DMA, and in the end we flush the caches * of all IOMMUs to make sure the changes to the device table are * active. */ - ret = amd_iommu_init_api(); - if (ret) { - pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n", - ret); - goto out; - } - for_each_pci_segment(pci_seg) init_device_table_dma(pci_seg); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 85c4122b5d61..da18f40f9abc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -1941,25 +1939,6 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_domain_flush_complete(domain); } -int __init amd_iommu_init_api(void) -{ - int err; - - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); - if (err) - return err; -#ifdef CONFIG_ARM_AMBA - err = bus_set_iommu(&amba_bustype, &amd_iommu_ops); - if (err) - return err; -#endif - err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); - if (err) - return err; - - return 0; -} - /***************************************************************************** * * The following functions belong to the exported interface of AMD IOMMU From 3c34d1c2d7960e0af3204dc5c3d37324438bbcb2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:08 +0100 Subject: [PATCH 15/59] iommu/arm-smmu: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary. With device probes now replayed for every IOMMU instance registration, the whole sorry ordering workaround for legacy DT bindings goes too, hooray! Acked-by: Will Deacon Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/f7aaad3e479a78623a6942ed46937249168b55bd.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 84 +-------------------------- 1 file changed, 2 insertions(+), 82 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 8039c8bc8470..4049980d4914 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -37,7 +37,6 @@ #include #include -#include #include #include "arm-smmu.h" @@ -93,8 +92,6 @@ static struct platform_driver arm_smmu_driver; static struct iommu_ops arm_smmu_ops; #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS -static int arm_smmu_bus_init(struct iommu_ops *ops); - static struct device_node *dev_get_dev_node(struct device *dev) { if (dev_is_pci(dev)) { @@ -180,20 +177,6 @@ static int arm_smmu_register_legacy_master(struct device *dev, kfree(sids); return err; } - -/* - * With the legacy DT binding in play, we have no guarantees about - * probe order, but then we're also not doing default domains, so we can - * delay setting bus ops until we're sure every possible SMMU is ready, - * and that way ensure that no probe_device() calls get missed. - */ -static int arm_smmu_legacy_bus_init(void) -{ - if (using_legacy_binding) - return arm_smmu_bus_init(&arm_smmu_ops); - return 0; -} -device_initcall_sync(arm_smmu_legacy_bus_init); #else static int arm_smmu_register_legacy_master(struct device *dev, struct arm_smmu_device **smmu) @@ -2015,52 +1998,6 @@ static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_bus_init(struct iommu_ops *ops) -{ - int err; - - /* Oh, for a proper bus abstraction */ - if (!iommu_present(&platform_bus_type)) { - err = bus_set_iommu(&platform_bus_type, ops); - if (err) - return err; - } -#ifdef CONFIG_ARM_AMBA - if (!iommu_present(&amba_bustype)) { - err = bus_set_iommu(&amba_bustype, ops); - if (err) - goto err_reset_platform_ops; - } -#endif -#ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) { - err = bus_set_iommu(&pci_bus_type, ops); - if (err) - goto err_reset_amba_ops; - } -#endif -#ifdef CONFIG_FSL_MC_BUS - if (!iommu_present(&fsl_mc_bus_type)) { - err = bus_set_iommu(&fsl_mc_bus_type, ops); - if (err) - goto err_reset_pci_ops; - } -#endif - return 0; - -err_reset_pci_ops: __maybe_unused; -#ifdef CONFIG_PCI - bus_set_iommu(&pci_bus_type, NULL); -#endif -err_reset_amba_ops: __maybe_unused; -#ifdef CONFIG_ARM_AMBA - bus_set_iommu(&amba_bustype, NULL); -#endif -err_reset_platform_ops: __maybe_unused; - bus_set_iommu(&platform_bus_type, NULL); - return err; -} - static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu) { struct list_head rmr_list; @@ -2225,7 +2162,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (err) { dev_err(dev, "Failed to register iommu\n"); - goto err_sysfs_remove; + iommu_device_sysfs_remove(&smmu->iommu); + return err; } platform_set_drvdata(pdev, smmu); @@ -2247,24 +2185,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) pm_runtime_enable(dev); } - /* - * For ACPI and generic DT bindings, an SMMU will be probed before - * any device which might need it, so we want the bus ops in place - * ready to handle default domain setup as soon as any SMMU exists. - */ - if (!using_legacy_binding) { - err = arm_smmu_bus_init(&arm_smmu_ops); - if (err) - goto err_unregister_device; - } - return 0; - -err_unregister_device: - iommu_device_unregister(&smmu->iommu); -err_sysfs_remove: - iommu_device_sysfs_remove(&smmu->iommu); - return err; } static int arm_smmu_device_remove(struct platform_device *pdev) @@ -2277,7 +2198,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); - arm_smmu_bus_init(NULL); iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); From 2efbd29bb1105d46e3eb64c00db6d9a0aee232ff Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:09 +0100 Subject: [PATCH 16/59] iommu/arm-smmu-v3: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the probe failure path accordingly. Acked-by: Will Deacon Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/6235f07df013776656a61bb642023ecce07f46cc.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 53 +-------------------- 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fceab59113ba..241efb7482e9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -28,8 +28,6 @@ #include #include -#include - #include "arm-smmu-v3.h" #include "../../iommu-sva-lib.h" @@ -3676,43 +3674,6 @@ static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) return SZ_128K; } -static int arm_smmu_set_bus_ops(struct iommu_ops *ops) -{ - int err; - -#ifdef CONFIG_PCI - if (pci_bus_type.iommu_ops != ops) { - err = bus_set_iommu(&pci_bus_type, ops); - if (err) - return err; - } -#endif -#ifdef CONFIG_ARM_AMBA - if (amba_bustype.iommu_ops != ops) { - err = bus_set_iommu(&amba_bustype, ops); - if (err) - goto err_reset_pci_ops; - } -#endif - if (platform_bus_type.iommu_ops != ops) { - err = bus_set_iommu(&platform_bus_type, ops); - if (err) - goto err_reset_amba_ops; - } - - return 0; - -err_reset_amba_ops: -#ifdef CONFIG_ARM_AMBA - bus_set_iommu(&amba_bustype, NULL); -#endif -err_reset_pci_ops: __maybe_unused; -#ifdef CONFIG_PCI - bus_set_iommu(&pci_bus_type, NULL); -#endif - return err; -} - static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, resource_size_t size) { @@ -3851,27 +3812,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - goto err_sysfs_remove; + iommu_device_sysfs_remove(&smmu->iommu); + return ret; } - ret = arm_smmu_set_bus_ops(&arm_smmu_ops); - if (ret) - goto err_unregister_device; - return 0; - -err_unregister_device: - iommu_device_unregister(&smmu->iommu); -err_sysfs_remove: - iommu_device_sysfs_remove(&smmu->iommu); - return ret; } static int arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); - arm_smmu_set_bus_ops(NULL); iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); From 006abbe36acdac4155a3422439935839bbf76c38 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:10 +0100 Subject: [PATCH 17/59] iommu/dart: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the probe failure path accordingly. Tested-by: Sven Peter Reviewed-by: Sven Peter Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/afe138964196907d58147a686c1dcd6a12f9e210.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 1b1725759262..437aed674fba 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -820,27 +820,6 @@ static irqreturn_t apple_dart_irq(int irq, void *dev) return IRQ_HANDLED; } -static int apple_dart_set_bus_ops(const struct iommu_ops *ops) -{ - int ret; - - if (!iommu_present(&platform_bus_type)) { - ret = bus_set_iommu(&platform_bus_type, ops); - if (ret) - return ret; - } -#ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) { - ret = bus_set_iommu(&pci_bus_type, ops); - if (ret) { - bus_set_iommu(&platform_bus_type, NULL); - return ret; - } - } -#endif - return 0; -} - static int apple_dart_probe(struct platform_device *pdev) { int ret; @@ -895,14 +874,10 @@ static int apple_dart_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dart); - ret = apple_dart_set_bus_ops(&apple_dart_iommu_ops); - if (ret) - goto err_free_irq; - ret = iommu_device_sysfs_add(&dart->iommu, dev, NULL, "apple-dart.%s", dev_name(&pdev->dev)); if (ret) - goto err_remove_bus_ops; + goto err_free_irq; ret = iommu_device_register(&dart->iommu, &apple_dart_iommu_ops, dev); if (ret) @@ -916,8 +891,6 @@ static int apple_dart_probe(struct platform_device *pdev) err_sysfs_remove: iommu_device_sysfs_remove(&dart->iommu); -err_remove_bus_ops: - apple_dart_set_bus_ops(NULL); err_free_irq: free_irq(dart->irq, dart); err_clk_disable: @@ -932,7 +905,6 @@ static int apple_dart_remove(struct platform_device *pdev) apple_dart_hw_reset(dart); free_irq(dart->irq, dart); - apple_dart_set_bus_ops(NULL); iommu_device_unregister(&dart->iommu); iommu_device_sysfs_remove(&dart->iommu); From 2bba80c2bf521f56acabda2689dd4d89d8d70882 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:11 +0100 Subject: [PATCH 18/59] iommu/exynos: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the init failure path accordingly. Tested-by: Marek Szyprowski Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d7477ef546479300217ca7bccb44da8b02715a07.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 8e18984a0c4f..45fd4850bacb 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1446,16 +1446,7 @@ static int __init exynos_iommu_init(void) goto err_zero_lv2; } - ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); - if (ret) { - pr_err("%s: Failed to register exynos-iommu driver.\n", - __func__); - goto err_set_iommu; - } - return 0; -err_set_iommu: - kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); err_zero_lv2: platform_driver_unregister(&exynos_sysmmu_driver); err_reg_driver: From b87d6d7fa405e23478f1e1dff6d66b5a533a5433 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:12 +0100 Subject: [PATCH 19/59] iommu/ipmmu-vmsa: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary. This also leaves the custom initcall effectively doing nothing but register the driver, which no longer needs to happen early either, so convert it to builtin_platform_driver(). Signed-off-by: Robin Murphy Reported-by: kernel test robot Link: https://lore.kernel.org/r/14377566e449950c19367f75ec1b09724bf0889f.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 1d42084d0276..3b30c0752274 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1090,11 +1090,6 @@ static int ipmmu_probe(struct platform_device *pdev) ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); if (ret) return ret; - -#if defined(CONFIG_IOMMU_DMA) - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &ipmmu_ops); -#endif } /* @@ -1168,32 +1163,4 @@ static struct platform_driver ipmmu_driver = { .probe = ipmmu_probe, .remove = ipmmu_remove, }; - -static int __init ipmmu_init(void) -{ - struct device_node *np; - static bool setup_done; - int ret; - - if (setup_done) - return 0; - - np = of_find_matching_node(NULL, ipmmu_of_ids); - if (!np) - return 0; - - of_node_put(np); - - ret = platform_driver_register(&ipmmu_driver); - if (ret < 0) - return ret; - -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &ipmmu_ops); -#endif - - setup_done = true; - return 0; -} -subsys_initcall(ipmmu_init); +builtin_platform_driver(ipmmu_driver); From 7341c365c3fb80f078e7d87f5f874b83a29e0e74 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:13 +0100 Subject: [PATCH 20/59] iommu/mtk: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the probe failure paths accordingly. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9134322ecd24030eebeac73f37ca579094cc7df0.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 24 +----------------------- drivers/iommu/mtk_iommu_v1.c | 13 +------------ 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7e363b1f24df..552e4eb8c610 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1243,30 +1243,13 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->hw_list = &data->hw_list_head; } - if (!iommu_present(&platform_bus_type)) { - ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); - if (ret) - goto out_list_del; - } - if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); if (ret) - goto out_bus_set_null; - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && - MTK_IOMMU_HAS_FLAG(data->plat_data, IFA_IOMMU_PCIE_SUPPORT)) { -#ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) { - ret = bus_set_iommu(&pci_bus_type, &mtk_iommu_ops); - if (ret) /* PCIe fail don't affect platform_bus. */ - goto out_list_del; - } -#endif + goto out_list_del; } return ret; -out_bus_set_null: - bus_set_iommu(&platform_bus_type, NULL); out_list_del: list_del(&data->list); iommu_device_unregister(&data->iommu); @@ -1294,11 +1277,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { device_link_remove(data->smicomm_dev, &pdev->dev); component_master_del(&pdev->dev, &mtk_iommu_com_ops); - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && - MTK_IOMMU_HAS_FLAG(data->plat_data, IFA_IOMMU_PCIE_SUPPORT)) { -#ifdef CONFIG_PCI - bus_set_iommu(&pci_bus_type, NULL); -#endif } pm_runtime_disable(&pdev->dev); for (i = 0; i < data->plat_data->banks_num; i++) { diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 128c7a3f1778..6e0e65831eb7 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -691,19 +691,11 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) if (ret) goto out_sysfs_remove; - if (!iommu_present(&platform_bus_type)) { - ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_v1_ops); - if (ret) - goto out_dev_unreg; - } - ret = component_master_add_with_match(dev, &mtk_iommu_v1_com_ops, match); if (ret) - goto out_bus_set_null; + goto out_dev_unreg; return ret; -out_bus_set_null: - bus_set_iommu(&platform_bus_type, NULL); out_dev_unreg: iommu_device_unregister(&data->iommu); out_sysfs_remove: @@ -718,9 +710,6 @@ static int mtk_iommu_v1_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); - if (iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, NULL); - clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_v1_com_ops); From a24090860e7dfaef096c69cc791db6b281246e88 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:14 +0100 Subject: [PATCH 21/59] iommu/omap: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the init failure path accordingly. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b578af8e2bf8afeccb2c2ce87c1aa38b36f01331.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index d9cf2820c02e..07ee2600113c 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1776,14 +1776,8 @@ static int __init omap_iommu_init(void) goto fail_driver; } - ret = bus_set_iommu(&platform_bus_type, &omap_iommu_ops); - if (ret) - goto fail_bus; - return 0; -fail_bus: - platform_driver_unregister(&omap_iommu_driver); fail_driver: kmem_cache_destroy(iopte_cachep); return ret; From 48a7c5080a28ab5a5a454008965cdd19790c69ae Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:15 +0100 Subject: [PATCH 22/59] iommu/tegra-smmu: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the probe failure path accordingly. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/13bb6baa6c4d74e95a12529e4eb1ddfb3885c3b5.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 2a8de975fe63..5b1af40221ec 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1083,8 +1083,8 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, /* * This is a bit of a hack. Ideally we'd want to simply return this - * value. However the IOMMU registration process will attempt to add - * all devices to the IOMMU when bus_set_iommu() is called. In order + * value. However iommu_device_register() will attempt to add + * all devices to the IOMMU before we get that far. In order * not to rely on global variables to track the IOMMU instance, we * set it here so that it can be looked up from the .probe_device() * callback via the IOMMU device's .drvdata field. @@ -1138,32 +1138,15 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, return ERR_PTR(err); err = iommu_device_register(&smmu->iommu, &tegra_smmu_ops, dev); - if (err) - goto remove_sysfs; - - err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops); - if (err < 0) - goto unregister; - -#ifdef CONFIG_PCI - err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops); - if (err < 0) - goto unset_platform_bus; -#endif + if (err) { + iommu_device_sysfs_remove(&smmu->iommu); + return ERR_PTR(err); + } if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_init(smmu); return smmu; - -unset_platform_bus: __maybe_unused; - bus_set_iommu(&platform_bus_type, NULL); -unregister: - iommu_device_unregister(&smmu->iommu); -remove_sysfs: - iommu_device_sysfs_remove(&smmu->iommu); - - return ERR_PTR(err); } void tegra_smmu_remove(struct tegra_smmu *smmu) From 19d3607c74bc0332f0ee3a2d54ead6e792c0dce9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:16 +0100 Subject: [PATCH 23/59] iommu/virtio: Clean up bus_set_iommu() Stop calling bus_set_iommu() since it's now unnecessary, and simplify the probe failure path accordingly. Reviewed-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0ff6f9166081724510e6772e43d45b317cab8c58.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 08eeafc9529f..31ab9d622b67 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -7,7 +7,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include @@ -17,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -1145,26 +1143,6 @@ static int viommu_probe(struct virtio_device *vdev) iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); -#ifdef CONFIG_PCI - if (pci_bus_type.iommu_ops != &viommu_ops) { - ret = bus_set_iommu(&pci_bus_type, &viommu_ops); - if (ret) - goto err_unregister; - } -#endif -#ifdef CONFIG_ARM_AMBA - if (amba_bustype.iommu_ops != &viommu_ops) { - ret = bus_set_iommu(&amba_bustype, &viommu_ops); - if (ret) - goto err_unregister; - } -#endif - if (platform_bus_type.iommu_ops != &viommu_ops) { - ret = bus_set_iommu(&platform_bus_type, &viommu_ops); - if (ret) - goto err_unregister; - } - vdev->priv = viommu; dev_info(dev, "input address: %u bits\n", @@ -1173,9 +1151,6 @@ static int viommu_probe(struct virtio_device *vdev) return 0; -err_unregister: - iommu_device_sysfs_remove(&viommu->iommu); - iommu_device_unregister(&viommu->iommu); err_free_vqs: vdev->config->del_vqs(vdev); From 29e932295bfaba792d29e66e8be0637ff3994724 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:17 +0100 Subject: [PATCH 24/59] iommu: Clean up bus_set_iommu() Clean up the remaining trivial bus_set_iommu() callsites along with the implementation. Now drivers only have to know and care about iommu_device instances, phew! Reviewed-by: Kevin Tian Tested-by: Matthew Rosato # s390 Tested-by: Niklas Schnelle # s390 Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ea383d5f4d74ffe200ab61248e5de6e95846180a.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 ---- drivers/iommu/fsl_pamu_domain.c | 4 ---- drivers/iommu/intel/iommu.c | 2 -- drivers/iommu/iommu.c | 24 ------------------------ drivers/iommu/msm_iommu.c | 2 -- drivers/iommu/rockchip-iommu.c | 2 -- drivers/iommu/s390-iommu.c | 6 ------ drivers/iommu/sprd-iommu.c | 5 ----- drivers/iommu/sun50i-iommu.c | 2 -- include/linux/iommu.h | 1 - 10 files changed, 52 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 66ca47f2e57f..3869c3ecda8c 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -837,8 +837,6 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) goto err_pm_disable; } - bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); - if (qcom_iommu->local_base) { pm_runtime_get_sync(dev); writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS); @@ -856,8 +854,6 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) { struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); - bus_set_iommu(&platform_bus_type, NULL); - pm_runtime_force_suspend(&pdev->dev); platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&qcom_iommu->iommu); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 08fd9089e3ba..fa20f4b03e12 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -476,11 +476,7 @@ int __init pamu_domain_init(void) if (ret) { iommu_device_sysfs_remove(&pamu_iommu); pr_err("Can't register iommu device\n"); - return ret; } - bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); - bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); - return ret; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5d3c220a0308..1ff58d25b713 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3890,7 +3890,6 @@ static int __init probe_acpi_namespace_devices(void) continue; } - pn->dev->bus->iommu_ops = &intel_iommu_ops; ret = iommu_probe_device(pn->dev); if (ret) break; @@ -4023,7 +4022,6 @@ int __init intel_iommu_init(void) } up_read(&dmar_global_lock); - bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 422d4e9f980b..83688db121f0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1826,30 +1826,6 @@ int bus_iommu_probe(struct bus_type *bus) return ret; } -/** - * bus_set_iommu - set iommu-callbacks for the bus - * @bus: bus. - * @ops: the callbacks provided by the iommu-driver - * - * This function is called by an iommu driver to set the iommu methods - * used for a particular bus. Drivers for devices on that bus can use - * the iommu-api after these ops are registered. - * This special function is needed because IOMMUs are usually devices on - * the bus itself, so the iommu drivers are not initialized when the bus - * is set up. With this function the iommu-driver can set the iommu-ops - * afterwards. - */ -int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) -{ - if (bus->iommu_ops && ops && bus->iommu_ops != ops) - return -EBUSY; - - bus->iommu_ops = ops; - - return 0; -} -EXPORT_SYMBOL_GPL(bus_set_iommu); - bool iommu_present(struct bus_type *bus) { return bus->iommu_ops != NULL; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a24aa804ea3..16179a9a7283 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -792,8 +792,6 @@ static int msm_iommu_probe(struct platform_device *pdev) goto fail; } - bus_set_iommu(&platform_bus_type, &msm_iommu_ops); - pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index ab57c4b8fade..a3fc59b814ab 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1300,8 +1300,6 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!dma_dev) dma_dev = &pdev->dev; - bus_set_iommu(&platform_bus_type, &rk_iommu_ops); - pm_runtime_enable(dev); for (i = 0; i < iommu->num_irq; i++) { diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ac4dab6d79e2..3c071782f6f1 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -390,9 +390,3 @@ static const struct iommu_ops s390_iommu_ops = { .free = s390_domain_free, } }; - -static int __init s390_iommu_init(void) -{ - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); -} -subsys_initcall(s390_iommu_init); diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 511959c8a14d..fadd2c907222 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -496,9 +496,6 @@ static int sprd_iommu_probe(struct platform_device *pdev) if (ret) goto remove_sysfs; - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &sprd_iommu_ops); - ret = sprd_iommu_clk_enable(sdev); if (ret) goto unregister_iommu; @@ -534,8 +531,6 @@ static int sprd_iommu_remove(struct platform_device *pdev) iommu_group_put(sdev->group); sdev->group = NULL; - bus_set_iommu(&platform_bus_type, NULL); - platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&sdev->iommu); iommu_device_unregister(&sdev->iommu); diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index a84c63518773..cd9b74ee24de 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -965,8 +965,6 @@ static int sun50i_iommu_probe(struct platform_device *pdev) if (ret < 0) goto err_unregister; - bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops); - return 0; err_unregister: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0013a1befe7b..35810f87ae74 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -416,7 +416,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); From de9f8a91eb32bb79149c4fc790df3b54ae710d18 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Aug 2022 18:28:03 +0100 Subject: [PATCH 25/59] iommu/dma: Clean up Kconfig Although iommu-dma is a per-architecture chonce, that is currently implemented in a rather haphazard way. Selecting from the arch Kconfig was the original logical approach, but is complicated by having to manage dependencies; conversely, selecting from drivers ends up hiding the architecture dependency *too* well. Instead, let's just have it enable itself automatically when IOMMU API support is enabled for the relevant architectures. It can't get much clearer than that. Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/2e33c8bc2b1bb478157b7964bfed976cb7466139.1660668998.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/Kconfig | 1 - drivers/iommu/Kconfig | 3 +-- drivers/iommu/amd/Kconfig | 1 - drivers/iommu/intel/Kconfig | 1 - 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9fb9fff08c94..ff4755fd2cb3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -209,7 +209,6 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO - select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 5c5cb5bee8b6..1d99c2d984fb 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -137,7 +137,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - bool + def_bool ARM64 || IA64 || X86 select DMA_OPS select IOMMU_API select IOMMU_IOVA @@ -476,7 +476,6 @@ config VIRTIO_IOMMU depends on VIRTIO depends on (ARM64 || X86) select IOMMU_API - select IOMMU_DMA select INTERVAL_TREE select ACPI_VIOT if ACPI help diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index a3cbafb603f5..9b5fc3356bf2 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -9,7 +9,6 @@ config AMD_IOMMU select PCI_PASID select IOMMU_API select IOMMU_IOVA - select IOMMU_DMA select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 39a06d245f12..c48005147ac5 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -19,7 +19,6 @@ config INTEL_IOMMU select DMAR_TABLE select SWIOTLB select IOASID - select IOMMU_DMA select PCI_ATS help DMA remapping (DMAR) devices support enables independent address From fa49364cd5e65797014688cba623541083b3e5b0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Aug 2022 18:28:04 +0100 Subject: [PATCH 26/59] iommu/dma: Move public interfaces to linux/iommu.h The iommu-dma layer is now mostly encapsulated by iommu_dma_ops, with only a couple more public interfaces left pertaining to MSI integration. Since these depend on the main IOMMU API header anyway, move their declarations there, taking the opportunity to update the half-baked comments to proper kerneldoc along the way. Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/9cd99738f52094e6bed44bfee03fa4f288d20695.1660668998.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 +- drivers/iommu/dma-iommu.c | 15 ++++++++++-- drivers/irqchip/irq-gic-v2m.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 2 +- drivers/irqchip/irq-gic-v3-mbi.c | 2 +- drivers/irqchip/irq-ls-scfg-msi.c | 2 +- drivers/vfio/vfio_iommu_type1.c | 1 - include/linux/dma-iommu.h | 40 ------------------------------- include/linux/iommu.h | 36 ++++++++++++++++++++++++++++ 9 files changed, 54 insertions(+), 48 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 599cf81f5685..7d7e9a046305 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 17dd683b2fce..6809b33ac9df 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1633,6 +1633,13 @@ out_free_page: return NULL; } +/** + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain + * @desc: MSI descriptor, will store the MSI page + * @msi_addr: MSI target address to be mapped + * + * Return: 0 on success or negative error code if the mapping failed. + */ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); @@ -1661,8 +1668,12 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return 0; } -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) +/** + * iommu_dma_compose_msi_msg() - Apply translation to an MSI message + * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() + * @msg: MSI message containing target physical address + */ +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) { struct device *dev = msi_desc_to_dev(desc); const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index b249d4df899e..6e1ac330d7a6 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -13,7 +13,7 @@ #define pr_fmt(fmt) "GICv2m: " fmt #include -#include +#include #include #include #include diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5ff09de6c48f..e7d8d4208ee6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -11,9 +11,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index a2163d32f17d..e1efdec9e9ac 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -6,7 +6,7 @@ #define pr_fmt(fmt) "GICv3: " fmt -#include +#include #include #include #include diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index b4927e425f7b..527c90e0920e 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include #define MSI_IRQS_PER_MSIR 32 #define MSI_MSIR_OFFSET 4 diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index db516c90a977..2f41e32c640c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "vfio.h" diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 24607dc3c2ac..e83de4f1f3d6 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -15,27 +15,10 @@ /* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); -int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); int iommu_dma_init_fq(struct iommu_domain *domain); -/* The DMA API isn't _quite_ the whole story, though... */ -/* - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU device - * - * The MSI page will be stored in @desc. - * - * Return: 0 on success otherwise an error describing the failure. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); - -/* Update the MSI message if required. */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg); - void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, @@ -46,15 +29,8 @@ extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; -struct msi_desc; -struct msi_msg; struct device; -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 dma_limit) -{ -} - static inline int iommu_dma_init_fq(struct iommu_domain *domain) { return -EINVAL; @@ -65,26 +41,10 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain) return -ENODEV; } -static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) -{ - return -ENODEV; -} - static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, - phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) -{ -} - static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 35810f87ae74..a325532aeab5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1063,4 +1063,40 @@ void iommu_debugfs_setup(void); static inline void iommu_debugfs_setup(void) {} #endif +#ifdef CONFIG_IOMMU_DMA +#include + +/* Setup call for arch DMA mapping code */ +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); + +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); + +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); + +#else /* CONFIG_IOMMU_DMA */ + +struct msi_desc; +struct msi_msg; + +static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +{ +} + +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + return 0; +} + +static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ + #endif /* __LINUX_IOMMU_H */ From 8cc233dec3137a2d243e6996e425c32032672c88 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:31 +0000 Subject: [PATCH 27/59] iommu/amd/io-pgtable: Implement map_pages io_pgtable_ops callback Implement the io_pgtable_ops->map_pages() callback for AMD driver. Also deprecate io_pgtable->map callback. Suggested-by: Robin Murphy Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 59 ++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 7d4b61e5db47..df7799317f66 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -360,8 +360,9 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct protection_domain *dom = io_pgtable_ops_to_domain(ops); LIST_HEAD(freelist); @@ -369,39 +370,47 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(iova, size)); - BUG_ON(!IS_ALIGNED(paddr, size)); + BUG_ON(!IS_ALIGNED(iova, pgsize)); + BUG_ON(!IS_ALIGNED(paddr, pgsize)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(size); - pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); + while (pgcount > 0) { + count = PAGE_SIZE_PTE_COUNT(pgsize); + pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated); - ret = -ENOMEM; - if (!pte) - goto out; + ret = -ENOMEM; + if (!pte) + goto out; - for (i = 0; i < count; ++i) - free_clear_pte(&pte[i], pte[i], &freelist); + for (i = 0; i < count; ++i) + free_clear_pte(&pte[i], pte[i], &freelist); - if (!list_empty(&freelist)) - updated = true; + if (!list_empty(&freelist)) + updated = true; - if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; - } else - __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + if (count > 1) { + __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; + } else + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; - for (i = 0; i < count; ++i) - pte[i] = __pte; + for (i = 0; i < count; ++i) + pte[i] = __pte; + + iova += pgsize; + paddr += pgsize; + pgcount--; + if (mapped) + *mapped += pgsize; + } ret = 0; @@ -514,7 +523,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb = &v1_flush_ops; - pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.map_pages = iommu_v1_map_pages; pgtable->iop.ops.unmap = iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; From 251c4db699ca7b966db7e59e8663a231c96ba454 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:32 +0000 Subject: [PATCH 28/59] iommu/amd/io-pgtable: Implement unmap_pages io_pgtable_ops callback Implement the io_pgtable_ops->unmap_pages() callback for AMD driver and deprecate io_pgtable_ops->unmap callback. Also if fetch_pte() returns NULL then return from unmap_mapages() instead of trying to continue to unmap remaining pages. Suggested-by: Robin Murphy Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index df7799317f66..ace0e9b8b913 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -435,17 +435,18 @@ out: return ret; } -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, - unsigned long iova, - size_t size, - struct iommu_iotlb_gather *gather) +static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; + size_t size = pgcount << __ffs(pgsize); - BUG_ON(!is_power_of_2(size)); + BUG_ON(!is_power_of_2(pgsize)); unmapped = 0; @@ -457,14 +458,14 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; + } else { + return unmapped; } iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } - BUG_ON(unmapped && !is_power_of_2(unmapped)); - return unmapped; } @@ -524,7 +525,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->tlb = &v1_flush_ops; pgtable->iop.ops.map_pages = iommu_v1_map_pages; - pgtable->iop.ops.unmap = iommu_v1_unmap_page; + pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pgtable->iop; From 6b080c4e815ceba3c08ffa980c858595c07e786a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:33 +0000 Subject: [PATCH 29/59] iommu/amd: Add map/unmap_pages() iommu_domain_ops callback support Implement the map_pages() and unmap_pages() callback for the AMD IOMMU driver to allow calls from iommu core to map and unmap multiple pages. Also deprecate map/unmap callbacks. Finally gatherer is not updated by iommu_v1_unmap_pages(). Hence pass NULL instead of gather to iommu_v1_unmap_pages. Suggested-by: Robin Murphy Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 65b8e4fd8217..07c47a7088be 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2174,13 +2174,13 @@ static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - if (ops->map) + if (ops->map_pages) domain_flush_np_cache(domain, iova, size); } -static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, size_t page_size, int iommu_prot, - gfp_t gfp) +static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int iommu_prot, gfp_t gfp, size_t *mapped) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; @@ -2196,8 +2196,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - if (ops->map) - ret = ops->map(ops, iova, paddr, page_size, prot, gfp); + if (ops->map_pages) { + ret = ops->map_pages(ops, iova, paddr, pgsize, + pgcount, prot, gfp, mapped); + } return ret; } @@ -2223,9 +2225,9 @@ static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain, iommu_iotlb_gather_add_range(gather, iova, size); } -static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size, - struct iommu_iotlb_gather *gather) +static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; @@ -2235,9 +2237,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, (domain->iop.mode == PAGE_MODE_NONE)) return 0; - r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; + r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0; - amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); + if (r) + amd_iommu_iotlb_gather_add_page(dom, gather, iova, r); return r; } @@ -2399,8 +2402,8 @@ const struct iommu_ops amd_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .detach_dev = amd_iommu_detach_device, - .map = amd_iommu_map, - .unmap = amd_iommu_unmap, + .map_pages = amd_iommu_map_pages, + .unmap_pages = amd_iommu_unmap_pages, .iotlb_sync_map = amd_iommu_iotlb_sync_map, .iova_to_phys = amd_iommu_iova_to_phys, .flush_iotlb_all = amd_iommu_flush_iotlb_all, From 43312b710b11a085ec81eb7d1878f8858045ae2b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 25 Aug 2022 06:39:34 +0000 Subject: [PATCH 30/59] iommu/amd: Refactor amd_iommu_domain_enable_v2 to remove locking The current function to enable IOMMU v2 also lock the domain. In order to reuse the same code in different code path, in which the domain has already been locked, refactor the function to separate the locking from the enabling logic. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220825063939.8360-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 46 +++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 07c47a7088be..771709d1dfff 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -85,6 +85,7 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static int domain_enable_v2(struct protection_domain *domain, int pasids); /**************************************************************************** * @@ -2450,11 +2451,10 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) } EXPORT_SYMBOL(amd_iommu_domain_direct_map); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ +static int domain_enable_v2(struct protection_domain *domain, int pasids) { - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int levels, ret; + int levels; /* Number of GCR3 table levels required */ for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) @@ -2463,7 +2463,25 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) if (levels > amd_iommu_max_glx_val) return -EINVAL; - spin_lock_irqsave(&domain->lock, flags); + domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + return -ENOMEM; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + + amd_iommu_domain_update(domain); + + return 0; +} + +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +{ + struct protection_domain *pdom = to_pdomain(dom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&pdom->lock, flags); /* * Save us all sanity checks whether devices already in the @@ -2471,24 +2489,14 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) * devices attached when it is switched into IOMMUv2 mode. */ ret = -EBUSY; - if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) + if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) goto out; - ret = -ENOMEM; - domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) - goto out; - - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; - - amd_iommu_domain_update(domain); - - ret = 0; + if (!pdom->gcr3_tbl) + ret = domain_enable_v2(pdom, pasids); out: - spin_unlock_irqrestore(&domain->lock, flags); - + spin_unlock_irqrestore(&pdom->lock, flags); return ret; } EXPORT_SYMBOL(amd_iommu_domain_enable_v2); From be1af02b277417be735d8513195e5ba1bc3c3a3d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 25 Aug 2022 06:39:35 +0000 Subject: [PATCH 31/59] iommu/amd: Update sanity check when enable PRI/ATS for IOMMU v1 table Currently, PPR/ATS can be enabled only if the domain is type identity mapping. However, when allowing the IOMMU v2 page table to be used for DMA-API, the check is no longer valid. Update the sanity check to only apply for when using AMD_IOMMU_V1 page table mode. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 771709d1dfff..c882b06e2360 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1694,7 +1694,7 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev) pci_disable_pasid(pdev); } -static int pdev_iommuv2_enable(struct pci_dev *pdev) +static int pdev_pri_ats_enable(struct pci_dev *pdev) { int ret; @@ -1757,11 +1757,19 @@ static int attach_device(struct device *dev, struct iommu_domain *def_domain = iommu_get_dma_domain(dev); ret = -EINVAL; - if (def_domain->type != IOMMU_DOMAIN_IDENTITY) + + /* + * In case of using AMD_IOMMU_V1 page table mode and the device + * is enabling for PPR/ATS support (using v2 table), + * we need to make sure that the domain type is identity map. + */ + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + def_domain->type != IOMMU_DOMAIN_IDENTITY) { goto out; + } if (dev_data->iommu_v2) { - if (pdev_iommuv2_enable(pdev) != 0) + if (pdev_pri_ats_enable(pdev) != 0) goto out; dev_data->ats.enabled = true; From aaac38f614871df252aa7459647bf68d42f7c3e7 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:36 +0000 Subject: [PATCH 32/59] iommu/amd: Initial support for AMD IOMMU v2 page table Introduce IO page table framework support for AMD IOMMU v2 page table. This patch implements 4 level page table within iommu amd driver and supports 4K/2M/1G page sizes. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 5 +- drivers/iommu/amd/io_pgtable_v2.c | 415 ++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 2 + 5 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable_v2.c diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index a935f8f4b974..773d8aa00283 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5b1019dab328..660c1f044912 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -276,6 +276,8 @@ * 512GB Pages are not supported due to a hardware bug */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +/* 4K, 2MB, 1G page sizes are supported */ +#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -526,7 +528,8 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ + u64 *pgd; /* v2 pgtable pgd pointer */ }; /* diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index 000000000000..8638ddf6fb3b --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + * Author: Vasant Hegde + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ +#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ +#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ +#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ +#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ +#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ +#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ +#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ +#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ + +#define MAX_PTRS_PER_PAGE 512 + +#define IOMMU_PAGE_SIZE_2M BIT_ULL(21) +#define IOMMU_PAGE_SIZE_1G BIT_ULL(30) + + +static inline int get_pgtable_level(void) +{ + /* 5 level page table is not supported */ + return PAGE_MODE_4_LEVEL; +} + +static inline bool is_large_pte(u64 pte) +{ + return (pte & IOMMU_PAGE_PSE); +} + +static inline void *alloc_pgtable_page(void) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static inline u64 set_pgtable_attr(u64 *page) +{ + u64 prot; + + prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; + prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + return (iommu_virt_to_phys(page) | prot); +} + +static inline void *get_pgtable_pte(u64 pte) +{ + return iommu_phys_to_virt(pte & PM_ADDR_MASK); +} + +static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) +{ + u64 pte; + + pte = __sme_set(paddr & PM_ADDR_MASK); + pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; + pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + if (prot & IOMMU_PROT_IW) + pte |= IOMMU_PAGE_RW; + + /* Large page */ + if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) + pte |= IOMMU_PAGE_PSE; + + return pte; +} + +static inline u64 get_alloc_page_size(u64 size) +{ + if (size >= IOMMU_PAGE_SIZE_1G) + return IOMMU_PAGE_SIZE_1G; + + if (size >= IOMMU_PAGE_SIZE_2M) + return IOMMU_PAGE_SIZE_2M; + + return PAGE_SIZE; +} + +static inline int page_size_to_level(u64 pg_size) +{ + if (pg_size == IOMMU_PAGE_SIZE_1G) + return PAGE_MODE_3_LEVEL; + if (pg_size == IOMMU_PAGE_SIZE_2M) + return PAGE_MODE_2_LEVEL; + + return PAGE_MODE_1_LEVEL; +} + +static inline void free_pgtable_page(u64 *pt) +{ + free_page((unsigned long)pt); +} + +static void free_pgtable(u64 *pt, int level) +{ + u64 *p; + int i; + + for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; + + if (is_large_pte(pt[i])) + continue; + + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = get_pgtable_pte(pt[i]); + if (level > 2) + free_pgtable(p, level - 1); + else + free_pgtable_page(p); + } + + free_pgtable_page(pt); +} + +/* Allocate page table */ +static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, + unsigned long pg_size, bool *updated) +{ + u64 *pte, *page; + int level, end_level; + + level = get_pgtable_level() - 1; + end_level = page_size_to_level(pg_size); + pte = &pgd[PM_LEVEL_INDEX(level, iova)]; + iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); + + while (level >= end_level) { + u64 __pte, __npte; + + __pte = *pte; + + if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { + /* Unmap large pte */ + cmpxchg64(pte, *pte, 0ULL); + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte)) { + page = alloc_pgtable_page(); + if (!page) + return NULL; + + __npte = set_pgtable_attr(page); + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_pgtable_page(page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + level -= 1; + pte = get_pgtable_pte(__pte); + pte = &pte[PM_LEVEL_INDEX(level, iova)]; + } + + /* Tear down existing pte entries */ + if (IOMMU_PTE_PRESENT(*pte)) { + u64 *__pte; + + *updated = true; + __pte = get_pgtable_pte(*pte); + cmpxchg64(pte, *pte, 0ULL); + if (pg_size == IOMMU_PAGE_SIZE_1G) + free_pgtable(__pte, end_level - 1); + else if (pg_size == IOMMU_PAGE_SIZE_2M) + free_pgtable_page(__pte); + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. + * If there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, unsigned long *page_size) +{ + u64 *pte; + int level; + + level = get_pgtable_level() - 1; + pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; + /* Default page size is 4K */ + *page_size = PAGE_SIZE; + + while (level) { + /* Not present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Walk to the next level */ + pte = get_pgtable_pte(*pte); + pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; + + /* Large page */ + if (is_large_pte(*pte)) { + if (level == PAGE_MODE_3_LEVEL) + *page_size = IOMMU_PAGE_SIZE_1G; + else if (level == PAGE_MODE_2_LEVEL) + *page_size = IOMMU_PAGE_SIZE_2M; + else + return NULL; /* Wrongly set PSE bit in PTE */ + + break; + } + + level -= 1; + } + + return pte; +} + +static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) +{ + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + u64 *pte; + unsigned long map_size; + unsigned long mapped_size = 0; + unsigned long o_iova = iova; + size_t size = pgcount << __ffs(pgsize); + int count = 0; + int ret = 0; + bool updated = false; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) + return -EINVAL; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + while (mapped_size < size) { + map_size = get_alloc_page_size(pgsize); + pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + if (!pte) { + ret = -EINVAL; + goto out; + } + + *pte = set_pte_attr(paddr, map_size, prot); + + count++; + iova += map_size; + paddr += map_size; + mapped_size += map_size; + } + +out: + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&pdom->domain, 0); + else + amd_iommu_flush_page(&pdom->domain, 0, o_iova); + } + + if (mapped) + *mapped += mapped_size; + + return ret; +} + +static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + unsigned long unmap_size; + unsigned long unmapped = 0; + size_t size = pgcount << __ffs(pgsize); + u64 *pte; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) + return 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (!pte) + return unmapped; + + *pte = 0ULL; + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + return unmapped; +} + +static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v2_tlb_flush_all(void *cookie) +{ +} + +static void v2_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v2_flush_ops = { + .tlb_flush_all = v2_tlb_flush_all, + .tlb_flush_walk = v2_tlb_flush_walk, + .tlb_add_page = v2_tlb_add_page, +}; + +static void v2_free_pgtable(struct io_pgtable *iop) +{ + struct protection_domain *pdom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + + pdom = container_of(pgtable, struct protection_domain, iop); + if (!(pdom->flags & PD_IOMMUV2_MASK)) + return; + + /* + * Make changes visible to IOMMUs. No need to clear gcr3 entry + * as gcr3 table is already freed. + */ + amd_iommu_domain_update(pdom); + + /* Free page table */ + free_pgtable(pgtable->pgd, get_pgtable_level()); +} + +static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + struct protection_domain *pdom = (struct protection_domain *)cookie; + int ret; + + pgtable->pgd = alloc_pgtable_page(); + if (!pgtable->pgd) + return NULL; + + ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); + if (ret) + goto err_free_pgd; + + pgtable->iop.ops.map_pages = iommu_v2_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v2_flush_ops; + + return &pgtable->iop; + +err_free_pgd: + free_pgtable_page(pgtable->pgd); + + return NULL; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { + .alloc = v2_alloc_pgtable, + .free = v2_free_pgtable, +}; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index f4bfcef98297..ac02a45ed798 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -27,6 +27,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif #ifdef CONFIG_AMD_IOMMU [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, + [AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns, #endif }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ca98aeadcc80..ffe616db9409 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,6 +16,7 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + AMD_IOMMU_V2, APPLE_DART, IO_PGTABLE_NUM_FMTS, }; @@ -260,6 +261,7 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ From 643feb0072d5aad9bf5f38eca3fb78c2f8d9f140 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 25 Aug 2022 06:39:37 +0000 Subject: [PATCH 33/59] iommu/amd: Add support for Guest IO protection AMD IOMMU introduces support for Guest I/O protection where the request from the I/O device without a PASID are treated as if they have PASID 0. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220825063939.8360-8-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c | 13 +++++++++++++ drivers/iommu/amd/iommu.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 660c1f044912..1beed57fc35d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -94,6 +94,7 @@ #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) #define FEATURE_GAM_VAPIC (1ULL<<21) +#define FEATURE_GIOSUP (1ULL<<48) #define FEATURE_EPHSUP (1ULL<<50) #define FEATURE_SNP (1ULL<<63) @@ -378,6 +379,7 @@ #define DTE_FLAG_IW (1ULL << 62) #define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GIOV (1ULL << 54) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) @@ -436,6 +438,7 @@ #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page translation */ #define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ +#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index fdc642362c14..779d4f9b17f8 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2068,6 +2068,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); + if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (!iommu_feature(iommu, FEATURE_GIOSUP) || + !iommu_feature(iommu, FEATURE_GT)) { + pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } else if (iommu_default_passthrough()) { + pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } + } + if (is_rd890_iommu(iommu->dev)) { int i, j; @@ -2146,6 +2157,8 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } + if (amd_iommu_pgtable == AMD_IOMMU_V2) + pr_info("V2 page table enabled\n"); } static int __init amd_iommu_init_pci(void) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c882b06e2360..6abc338dc207 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1597,6 +1597,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; flags |= tmp; + + if (domain->flags & PD_GIOV_MASK) + pte_root |= DTE_FLAG_GIOV; } flags &= ~DEV_DOMID_MASK; From 4db6c41f09469ab8ca5d30f5d4731f299d25ff1c Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 25 Aug 2022 06:39:38 +0000 Subject: [PATCH 34/59] iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API Introduce init function for setting up DMA domain for DMA-API with the IOMMU v2 page table. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220825063939.8360-9-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6abc338dc207..6bfcac8de1f4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1653,6 +1653,10 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; + /* Override supported page sizes */ + if (domain->flags & PD_GIOV_MASK) + domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; + /* Update device table */ set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); @@ -2032,6 +2036,24 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return 0; } +static int protection_domain_init_v2(struct protection_domain *domain) +{ + spin_lock_init(&domain->lock); + domain->id = domain_id_alloc(); + if (!domain->id) + return -ENOMEM; + INIT_LIST_HEAD(&domain->dev_list); + + domain->flags |= PD_GIOV_MASK; + + if (domain_enable_v2(domain, 1)) { + domain_id_free(domain->id); + return -ENOMEM; + } + + return 0; +} + static struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; @@ -2059,6 +2081,9 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) case AMD_IOMMU_V1: ret = protection_domain_init_v1(domain, mode); break; + case AMD_IOMMU_V2: + ret = protection_domain_init_v2(domain); + break; default: ret = -EINVAL; } From d799a183da39ac4988b62da8978950efa177ba9f Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:39 +0000 Subject: [PATCH 35/59] iommu/amd: Add command-line option to enable different page table Enhance amd_iommu command line option to specify v1 or v2 page table. By default system will boot in V1 page table mode. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-10-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- .../admin-guide/kernel-parameters.txt | 2 ++ drivers/iommu/amd/init.c | 23 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 426fa892d311..f45c8fc2056c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -321,6 +321,8 @@ force_enable - Force enable the IOMMU on platforms known to be buggy with IOMMU enabled. Use this option with care. + pgtbl_v1 - Use v1 page table for DMA-API (Default). + pgtbl_v2 - Use v2 page table for DMA-API. amd_iommu_dump= [HW,X86-64] Enable AMD IOMMU driver option to dump the ACPI table diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 779d4f9b17f8..688cf8387b0b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3379,17 +3379,30 @@ static int __init parse_amd_iommu_intr(char *str) static int __init parse_amd_iommu_options(char *str) { - for (; *str; ++str) { + if (!str) + return -EINVAL; + + while (*str) { if (strncmp(str, "fullflush", 9) == 0) { pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); iommu_set_dma_strict(); - } - if (strncmp(str, "force_enable", 12) == 0) + } else if (strncmp(str, "force_enable", 12) == 0) { amd_iommu_force_enable = true; - if (strncmp(str, "off", 3) == 0) + } else if (strncmp(str, "off", 3) == 0) { amd_iommu_disabled = true; - if (strncmp(str, "force_isolation", 15) == 0) + } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; + } else if (strncmp(str, "pgtbl_v1", 8) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V1; + } else if (strncmp(str, "pgtbl_v2", 8) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V2; + } else { + pr_notice("Unknown option - '%s'\n", str); + } + + str += strcspn(str, ","); + while (*str == ',') + str++; } return 1; From f2042ed21da7f8886c93efefff61f93e6d57e9bd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Aug 2022 18:28:05 +0100 Subject: [PATCH 36/59] iommu/dma: Make header private Now that dma-iommu.h only contains internal interfaces, make it private to the IOMMU subsytem. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b237e06c56a101f77af142a54b629b27aa179d22.1660668998.git.robin.murphy@arm.com [ joro : re-add stub for iommu_dma_get_resv_regions ] Signed-off-by: Joerg Roedel --- MAINTAINERS | 2 +- drivers/acpi/viot.c | 1 - drivers/gpu/drm/exynos/exynos_drm_dma.c | 1 - drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/apple-dart.c | 3 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- drivers/iommu/dma-iommu.c | 3 ++- {include/linux => drivers/iommu}/dma-iommu.h | 13 +------------ drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/iommu.c | 3 ++- drivers/iommu/virtio-iommu.c | 3 ++- 12 files changed, 14 insertions(+), 23 deletions(-) rename {include/linux => drivers/iommu}/dma-iommu.h (76%) diff --git a/MAINTAINERS b/MAINTAINERS index d30f26e07cd3..09abd5444fe7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10626,8 +10626,8 @@ L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/dma-iommu.c +F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c -F: include/linux/dma-iommu.h F: include/linux/iova.h IOMMU SUBSYSTEM diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index 6132092dab2a..ed752cbbe636 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -19,7 +19,6 @@ #define pr_fmt(fmt) "ACPI: VIOT: " fmt #include -#include #include #include #include diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index bf33c3084cb4..a971590b8132 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -4,7 +4,6 @@ // Author: Inki Dae // Author: Andrzej Hajda -#include #include #include #include diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index da18f40f9abc..b46b0e8ecbdf 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,7 @@ #include #include "amd_iommu.h" +#include "../dma-iommu.h" #include "../irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 437aed674fba..79643b84cb14 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,8 @@ #include #include +#include "dma-iommu.h" + #define DART_MAX_STREAMS 16 #define DART_MAX_TTBR 4 #define MAX_DARTS_PER_DEVICE 2 diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 241efb7482e9..b788a38d8fdf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -29,6 +28,7 @@ #include #include "arm-smmu-v3.h" +#include "../../dma-iommu.h" #include "../../iommu-sva-lib.h" static bool disable_bypass = true; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4049980d4914..6c1114a4d6cc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,7 @@ #include #include "arm-smmu.h" +#include "../../dma-iommu.h" /* * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 6809b33ac9df..9297b741f5e8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -30,6 +29,8 @@ #include #include +#include "dma-iommu.h" + struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; diff --git a/include/linux/dma-iommu.h b/drivers/iommu/dma-iommu.h similarity index 76% rename from include/linux/dma-iommu.h rename to drivers/iommu/dma-iommu.h index e83de4f1f3d6..942790009292 100644 --- a/include/linux/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -5,15 +5,10 @@ #ifndef __DMA_IOMMU_H #define __DMA_IOMMU_H -#include -#include +#include #ifdef CONFIG_IOMMU_DMA -#include -#include -#include -/* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); @@ -21,16 +16,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain); - extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ -struct iommu_domain; -struct device; - static inline int iommu_dma_init_fq(struct iommu_domain *domain) { return -EINVAL; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1ff58d25b713..3bbd865910a6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include "iommu.h" +#include "../dma-iommu.h" #include "../irq_remapping.h" #include "../iommu-sva-lib.h" #include "pasid.h" diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 83688db121f0..c864cbb16523 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -30,6 +29,8 @@ #include #include +#include "dma-iommu.h" + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 31ab9d622b67..5ee24dc6f5ae 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -23,6 +22,8 @@ #include +#include "dma-iommu.h" + #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 From a390bde7075451bf419777a94fc7a5106e6e7feb Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Sep 2022 21:34:39 +0800 Subject: [PATCH 37/59] iova: Remove some magazine pointer NULL checks Since commit 32e92d9f6f87 ("iommu/iova: Separate out rcache init") it has not been possible to have NULL CPU rcache "loaded" or "prev" magazine pointers once the IOVA domain has been properly initialized. Previously it was only possible to have NULL pointers from failure to allocate the magazines in the IOVA domain initialization. The only other two functions to modify these pointers - __iova_rcache_{get, insert}() - would already ensure that these pointers were non-NULL if initially non-NULL. As such, the mag NULL pointer checks in iova_magazine_full(), iova_magazine_empty(), and iova_magazine_free_pfns() may be dropped. Signed-off-by: John Garry Reviewed-by: Robin Murphy Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/1662557681-145906-2-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 47d1983dfa2a..580fdf669922 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -661,9 +661,6 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) unsigned long flags; int i; - if (!mag) - return; - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (i = 0 ; i < mag->size; ++i) { @@ -683,12 +680,12 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) static bool iova_magazine_full(struct iova_magazine *mag) { - return (mag && mag->size == IOVA_MAG_SIZE); + return mag->size == IOVA_MAG_SIZE; } static bool iova_magazine_empty(struct iova_magazine *mag) { - return (!mag || mag->size == 0); + return mag->size == 0; } static unsigned long iova_magazine_pop(struct iova_magazine *mag, From 8b2818c7be7bc49133c50f384247e9164a225e3f Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Sep 2022 21:34:40 +0800 Subject: [PATCH 38/59] iova: Remove magazine BUG_ON() checks Two of the magazine helpers have BUG_ON() checks, as follows: - iova_magazine_pop() - here we ensure that the mag is not empty. However we already ensure that in the only caller, __iova_rcache_get(). - iova_magazine_push() - here we ensure that the mag is not full. However we already ensure that in the only caller, __iova_rcache_insert(). As described, the two bug checks are pointless so drop them. Signed-off-by: John Garry Acked-by: Robin Murphy Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/1662557681-145906-3-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 580fdf669922..8aece052ce72 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -694,8 +694,6 @@ static unsigned long iova_magazine_pop(struct iova_magazine *mag, int i; unsigned long pfn; - BUG_ON(iova_magazine_empty(mag)); - /* Only fall back to the rbtree if we have no suitable pfns at all */ for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) if (i == 0) @@ -710,8 +708,6 @@ static unsigned long iova_magazine_pop(struct iova_magazine *mag, static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) { - BUG_ON(iova_magazine_full(mag)); - mag->pfns[mag->size++] = pfn; } From 189cb8fec14acf4dee59eb2011519e86d389cd3e Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Sep 2022 21:34:41 +0800 Subject: [PATCH 39/59] iova: Remove iovad->rcaches check in iova_rcache_get() The iovad->rcaches check in iova_rcache_get() is pretty much useless without the same check in iova_rcache_insert(). Instead of adding this symmetric check to fastpath iova_rcache_insert(), drop the check in iova_rcache_get() in favour of making the IOVA domain rcache init more robust to failure in future. Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1662557681-145906-4-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 8aece052ce72..a44ad92fc5eb 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -875,7 +875,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, { unsigned int log_size = order_base_2(size); - if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches) + if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); From c7883f8d2b0ab4b8e4df8cf37861523ad9ad1407 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 7 Sep 2022 17:11:54 +0200 Subject: [PATCH 40/59] iommu/virtio: Fix compile error with viommu_capable() A recent fix introduced viommu_capable() but other changes from Robin change the function signature of the call-back it is used for. When both changes are merged a compile error will happen because the function pointer types mismatch. Fix that by updating the viommu_capable() signature after the merge. Cc: Jean-Philippe Brucker Cc: Robin Murphy Signed-off-by: Joerg Roedel Acked-by: Robin Murphy Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220907151154.21911-1-joro@8bytes.org --- drivers/iommu/virtio-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index da463db9f12a..1b12825e2df1 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1005,7 +1005,7 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, args->args, 1); } -static bool viommu_capable(enum iommu_cap cap) +static bool viommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: From 053bab4c220be7749b7d7e101d9d172f3991b21a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 12 Sep 2022 06:32:44 +0000 Subject: [PATCH 41/59] iommu/amd: Free domain id in error path Call domain_id_free() in error path. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220912063248.7909-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6bfcac8de1f4..c55f4a129b1e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2027,8 +2027,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) if (mode != PAGE_MODE_NONE) { pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) + if (!pt_root) { + domain_id_free(domain->id); return -ENOMEM; + } } amd_iommu_domain_set_pgtable(domain, pt_root, mode); @@ -2092,8 +2094,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) goto out_err; pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); - if (!pgtbl_ops) + if (!pgtbl_ops) { + domain_id_free(domain->id); goto out_err; + } return domain; out_err: From 2455d6a46c2da5703752772f49d56142ccd50463 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 12 Sep 2022 06:32:46 +0000 Subject: [PATCH 42/59] iommu/amd: Free domain ID after domain_flush_pages free_io_pgtable_ops() path uses domain ID to flush pages. Hence free domain ID after flushing everything. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220912063248.7909-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c55f4a129b1e..00d0f23b0c0a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2004,12 +2004,12 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; - if (domain->id) - domain_id_free(domain->id); - if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); + if (domain->id) + domain_id_free(domain->id); + kfree(domain); } From 6b5b58626ef90ef45513c78bfc84540ff954431c Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 12 Sep 2022 06:32:47 +0000 Subject: [PATCH 43/59] iommu/amd: Remove outdated comment Comment is not related to amd_iommu_ops variable. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220912063248.7909-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 00d0f23b0c0a..d72d8a325380 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -66,10 +66,6 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); -/* - * Domain for untranslated devices - only allocated - * if iommu=pt passed on kernel cmd line. - */ const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); From f9e2f0e8357658c448fd5397cc959da3aae4435d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 12 Sep 2022 06:32:48 +0000 Subject: [PATCH 44/59] iommu/amd: Fix sparse warning CHECK drivers/iommu/amd/iommu.c drivers/iommu/amd/iommu.c:73:24: warning: symbol 'amd_iommu_ops' was not declared. Should it be static? Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220912063248.7909-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 2 ++ drivers/iommu/amd/init.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1beed57fc35d..1d0a70c85333 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -461,6 +461,8 @@ struct irq_remap_table { /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; +extern const struct iommu_ops amd_iommu_ops; + /* IVRS indicates that pre-boot remapping was enabled */ extern bool amdr_ivrs_remap_support; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 688cf8387b0b..a515e837adf1 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -95,8 +95,6 @@ * out of it. */ -extern const struct iommu_ops amd_iommu_ops; - /* * structure describing one IOMMU in the ACPI table. Typically followed by one * or more ivhd_entrys. From 4f58330fcc8482aa90674e1f40f601e82f18ed4a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 13 Sep 2022 12:47:20 +0100 Subject: [PATCH 45/59] iommu/iova: Fix module config properly IOMMU_IOVA is intended to be an optional library for users to select as and when they desire. Since it can be a module now, this means that built-in code which has chosen not to select it should not fail to link if it happens to have selected as a module by someone else. Replace IS_ENABLED() with IS_REACHABLE() to do the right thing. CC: Thierry Reding Reported-by: John Garry Fixes: 15bbdec3931e ("iommu: Make the iova library a module") Signed-off-by: Robin Murphy Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/548c2f683ca379aface59639a8f0cccc3a1ac050.1663069227.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iova.h b/include/linux/iova.h index c6ba6d95d79c..83c00fac2acb 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,7 +75,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#if IS_ENABLED(CONFIG_IOMMU_IOVA) +#if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); From 4e5f8465c65ed6b2c1878cca2727a741725be341 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 13 Sep 2022 17:11:46 +0200 Subject: [PATCH 46/59] dt-bindings: mediatek: Add bindings for MT6795 M4U Add bindings for the MediaTek Helio X10 (MT6795) IOMMU/M4U. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Rob Herring Acked-by: Krzysztof Kozlowski Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220913151148.412312-2-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 4 + include/dt-bindings/memory/mt6795-larb-port.h | 95 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 include/dt-bindings/memory/mt6795-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index fee0241b5098..839e3be0bf3c 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -73,6 +73,7 @@ properties: - mediatek,mt2701-m4u # generation one - mediatek,mt2712-m4u # generation two - mediatek,mt6779-m4u # generation two + - mediatek,mt6795-m4u # generation two - mediatek,mt8167-m4u # generation two - mediatek,mt8173-m4u # generation two - mediatek,mt8183-m4u # generation two @@ -124,6 +125,7 @@ properties: dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, dt-binding/memory/mt2712-larb-port.h for mt2712, dt-binding/memory/mt6779-larb-port.h for mt6779, + dt-binding/memory/mt6795-larb-port.h for mt6795, dt-binding/memory/mt8167-larb-port.h for mt8167, dt-binding/memory/mt8173-larb-port.h for mt8173, dt-binding/memory/mt8183-larb-port.h for mt8183, @@ -148,6 +150,7 @@ allOf: enum: - mediatek,mt2701-m4u - mediatek,mt2712-m4u + - mediatek,mt6795-m4u - mediatek,mt8173-m4u - mediatek,mt8186-iommu-mm - mediatek,mt8192-m4u @@ -177,6 +180,7 @@ allOf: contains: enum: - mediatek,mt2712-m4u + - mediatek,mt6795-m4u - mediatek,mt8173-m4u then: diff --git a/include/dt-bindings/memory/mt6795-larb-port.h b/include/dt-bindings/memory/mt6795-larb-port.h new file mode 100644 index 000000000000..58cf6a6b6372 --- /dev/null +++ b/include/dt-bindings/memory/mt6795-larb-port.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022 Collabora Ltd. + * Author: AngeloGioacchino Del Regno + */ + +#ifndef _DT_BINDINGS_MEMORY_MT6795_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT6795_LARB_PORT_H_ + +#include + +#define M4U_LARB0_ID 0 +#define M4U_LARB1_ID 1 +#define M4U_LARB2_ID 2 +#define M4U_LARB3_ID 3 +#define M4U_LARB4_ID 4 + +/* larb0 */ +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_OVL1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_DISP_RDMA2 MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_DISP_WDMA1 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_DISP_OD_R MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_DISP_OD_W MTK_M4U_ID(M4U_LARB0_ID, 8) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 9) +#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 10) +#define M4U_PORT_MDP_WDMA MTK_M4U_ID(M4U_LARB0_ID, 11) +#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 12) +#define M4U_PORT_MDP_WROT1 MTK_M4U_ID(M4U_LARB0_ID, 13) + +/* larb1 */ +#define M4U_PORT_VDEC_MC MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_VDEC_PP MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_VDEC_UFO MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_VDEC_VLD MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_VDEC_VLD2 MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_VDEC_AVC_MV MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_VDEC_PRED_RD MTK_M4U_ID(M4U_LARB1_ID, 6) +#define M4U_PORT_VDEC_PRED_WR MTK_M4U_ID(M4U_LARB1_ID, 7) +#define M4U_PORT_VDEC_PPWRAP MTK_M4U_ID(M4U_LARB1_ID, 8) + +/* larb2 */ +#define M4U_PORT_CAM_IMGO MTK_M4U_ID(M4U_LARB2_ID, 0) +#define M4U_PORT_CAM_RRZO MTK_M4U_ID(M4U_LARB2_ID, 1) +#define M4U_PORT_CAM_AAO MTK_M4U_ID(M4U_LARB2_ID, 2) +#define M4U_PORT_CAM_LCSO MTK_M4U_ID(M4U_LARB2_ID, 3) +#define M4U_PORT_CAM_ESFKO MTK_M4U_ID(M4U_LARB2_ID, 4) +#define M4U_PORT_CAM_IMGO_S MTK_M4U_ID(M4U_LARB2_ID, 5) +#define M4U_PORT_CAM_LSCI MTK_M4U_ID(M4U_LARB2_ID, 6) +#define M4U_PORT_CAM_LSCI_D MTK_M4U_ID(M4U_LARB2_ID, 7) +#define M4U_PORT_CAM_BPCI MTK_M4U_ID(M4U_LARB2_ID, 8) +#define M4U_PORT_CAM_BPCI_D MTK_M4U_ID(M4U_LARB2_ID, 9) +#define M4U_PORT_CAM_UFDI MTK_M4U_ID(M4U_LARB2_ID, 10) +#define M4U_PORT_CAM_IMGI MTK_M4U_ID(M4U_LARB2_ID, 11) +#define M4U_PORT_CAM_IMG2O MTK_M4U_ID(M4U_LARB2_ID, 12) +#define M4U_PORT_CAM_IMG3O MTK_M4U_ID(M4U_LARB2_ID, 13) +#define M4U_PORT_CAM_VIPI MTK_M4U_ID(M4U_LARB2_ID, 14) +#define M4U_PORT_CAM_VIP2I MTK_M4U_ID(M4U_LARB2_ID, 15) +#define M4U_PORT_CAM_VIP3I MTK_M4U_ID(M4U_LARB2_ID, 16) +#define M4U_PORT_CAM_LCEI MTK_M4U_ID(M4U_LARB2_ID, 17) +#define M4U_PORT_CAM_RB MTK_M4U_ID(M4U_LARB2_ID, 18) +#define M4U_PORT_CAM_RP MTK_M4U_ID(M4U_LARB2_ID, 19) +#define M4U_PORT_CAM_WR MTK_M4U_ID(M4U_LARB2_ID, 20) + +/* larb3 */ +#define M4U_PORT_VENC_RCPU MTK_M4U_ID(M4U_LARB3_ID, 0) +#define M4U_PORT_VENC_REC MTK_M4U_ID(M4U_LARB3_ID, 1) +#define M4U_PORT_VENC_BSDMA MTK_M4U_ID(M4U_LARB3_ID, 2) +#define M4U_PORT_VENC_SV_COMV MTK_M4U_ID(M4U_LARB3_ID, 3) +#define M4U_PORT_VENC_RD_COMV MTK_M4U_ID(M4U_LARB3_ID, 4) +#define M4U_PORT_JPGENC_BSDMA MTK_M4U_ID(M4U_LARB3_ID, 5) +#define M4U_PORT_REMDC_SDMA MTK_M4U_ID(M4U_LARB3_ID, 6) +#define M4U_PORT_REMDC_BSDMA MTK_M4U_ID(M4U_LARB3_ID, 7) +#define M4U_PORT_JPGENC_RDMA MTK_M4U_ID(M4U_LARB3_ID, 8) +#define M4U_PORT_JPGENC_SDMA MTK_M4U_ID(M4U_LARB3_ID, 9) +#define M4U_PORT_JPGDEC_WDMA MTK_M4U_ID(M4U_LARB3_ID, 10) +#define M4U_PORT_JPGDEC_BSDMA MTK_M4U_ID(M4U_LARB3_ID, 11) +#define M4U_PORT_VENC_CUR_LUMA MTK_M4U_ID(M4U_LARB3_ID, 12) +#define M4U_PORT_VENC_CUR_CHROMA MTK_M4U_ID(M4U_LARB3_ID, 13) +#define M4U_PORT_VENC_REF_LUMA MTK_M4U_ID(M4U_LARB3_ID, 14) +#define M4U_PORT_VENC_REF_CHROMA MTK_M4U_ID(M4U_LARB3_ID, 15) +#define M4U_PORT_REMDC_WDMA MTK_M4U_ID(M4U_LARB3_ID, 16) +#define M4U_PORT_VENC_NBM_RDMA MTK_M4U_ID(M4U_LARB3_ID, 17) +#define M4U_PORT_VENC_NBM_WDMA MTK_M4U_ID(M4U_LARB3_ID, 18) + +/* larb4 */ +#define M4U_PORT_MJC_MV_RD MTK_M4U_ID(M4U_LARB4_ID, 0) +#define M4U_PORT_MJC_MV_WR MTK_M4U_ID(M4U_LARB4_ID, 1) +#define M4U_PORT_MJC_DMA_RD MTK_M4U_ID(M4U_LARB4_ID, 2) +#define M4U_PORT_MJC_DMA_WR MTK_M4U_ID(M4U_LARB4_ID, 3) + +#endif From 86580ec969bb1df9dbc316643853e7664ace4cae Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 13 Sep 2022 17:11:47 +0200 Subject: [PATCH 47/59] iommu/mediatek: Introduce new flag TF_PORT_TO_ADDR_MT8173 In preparation for adding support for MT6795, add a new flag named TF_PORT_TO_ADDR_MT8173 and use that instead of checking for m4u_plat type in mtk_iommu_hw_init() to avoid seeing a long list of m4u_plat checks there in the future. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220913151148.412312-3-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7e363b1f24df..b511359376f4 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -138,6 +138,7 @@ #define PM_CLK_AO BIT(15) #define IFA_IOMMU_PCIE_SUPPORT BIT(16) #define PGTABLE_PA_35_EN BIT(17) +#define TF_PORT_TO_ADDR_MT8173 BIT(18) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -955,7 +956,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data, unsigned int ban * Global control settings are in bank0. May re-init these global registers * since no sure if there is bank0 consumers. */ - if (data->plat_data->m4u_plat == M4U_MT8173) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, TF_PORT_TO_ADDR_MT8173)) { regval = F_MMU_PREFETCH_RT_REPLACE_MOD | F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173; } else { @@ -1427,7 +1428,8 @@ static const struct mtk_iommu_plat_data mt8167_data = { static const struct mtk_iommu_plat_data mt8173_data = { .m4u_plat = M4U_MT8173, .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | - HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, + HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM | + TF_PORT_TO_ADDR_MT8173, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .banks_num = 1, .banks_enable = {true}, From 717ec15e5ce98ed267c9e31ff0469f57782c85e9 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 13 Sep 2022 17:11:48 +0200 Subject: [PATCH 48/59] iommu/mediatek: Add support for MT6795 Helio X10 M4Us Add support for the M4Us found in the MT6795 Helio X10 SoC. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220913151148.412312-4-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b511359376f4..e3f03a1d32b8 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -158,6 +158,7 @@ enum mtk_iommu_plat { M4U_MT2712, M4U_MT6779, + M4U_MT6795, M4U_MT8167, M4U_MT8173, M4U_MT8183, @@ -1414,6 +1415,19 @@ static const struct mtk_iommu_plat_data mt6779_data = { .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, }; +static const struct mtk_iommu_plat_data mt6795_data = { + .m4u_plat = M4U_MT6795, + .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | + HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM | + TF_PORT_TO_ADDR_MT8173, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), + .larbid_remap = {{0}, {1}, {2}, {3}, {4}}, /* Linear mapping. */ +}; + static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, @@ -1526,6 +1540,7 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, + { .compatible = "mediatek,mt6795-m4u", .data = &mt6795_data}, { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, From 745ef1092bcfcf3bca8d82c260947ca498022dde Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 16 Sep 2022 11:41:49 +0200 Subject: [PATCH 49/59] iommu/io-pgtable: Move Apple DART support to its own file The pte format used by the DARTs found in the Apple M1 (t8103) is not fully compatible with io-pgtable-arm. The 24 MSB are used for subpage protection (mapping only parts of page) and conflict with the address mask. In addition bit 1 is not available for tagging entries but disables subpage protection. Subpage protection could be useful to support a CPU granule of 4k with the fixed IOMMU page size of 16k. The DARTs found on Apple M1 Pro/Max/Ultra use another different pte format which is even less compatible. To support an output address size of 42 bit the address is shifted down by 4. Subpage protection is mandatory and bit 1 signifies uncached mappings used by the display controller. It would be advantageous to share code for all known Apple DART variants to support common features. The page table allocator for DARTs is less complex since it uses a two levels of translation table without support for huge pages. Signed-off-by: Janne Grunau Acked-by: Robin Murphy Acked-by: Sven Peter Acked-by: Hector Martin Link: https://lore.kernel.org/r/20220916094152.87137-3-j@jannau.net [ joro: Fix compile warning in __dart_alloc_pages()] Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 13 +- drivers/iommu/Makefile | 1 + drivers/iommu/io-pgtable-arm.c | 63 ----- drivers/iommu/io-pgtable-dart.c | 426 ++++++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 2 + 6 files changed, 442 insertions(+), 64 deletions(-) create mode 100644 drivers/iommu/io-pgtable-dart.c diff --git a/MAINTAINERS b/MAINTAINERS index d30f26e07cd3..1a205485b002 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1887,6 +1887,7 @@ F: drivers/dma/apple-admac.c F: drivers/i2c/busses/i2c-pasemi-core.c F: drivers/i2c/busses/i2c-pasemi-platform.c F: drivers/iommu/apple-dart.c +F: drivers/iommu/io-pgtable-dart.c F: drivers/irqchip/irq-apple-aic.c F: drivers/mailbox/apple-mailbox.c F: drivers/nvme/host/apple.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 5c5cb5bee8b6..82cebeb5529c 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -67,6 +67,17 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST If unsure, say N here. +config IOMMU_IO_PGTABLE_DART + bool "Apple DART Formats" + select IOMMU_IO_PGTABLE + depends on ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + help + Enable support for the Apple DART pagetable formats. These include + the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family + SoCs. + + If unsure, say N here. + endmenu config IOMMU_DEBUGFS @@ -294,7 +305,7 @@ config APPLE_DART tristate "Apple DART IOMMU Support" depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API - select IOMMU_IO_PGTABLE_LPAE + select IOMMU_IO_PGTABLE_DART default ARCH_APPLE help Support for Apple DART (Device Address Resolution Table) IOMMUs diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 44475a9b3eea..cc9f381013c3 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o +obj-$(CONFIG_IOMMU_IO_PGTABLE_DART) += io-pgtable-dart.o obj-$(CONFIG_IOASID) += ioasid.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 94ff319ae8ac..d7f5e23da643 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -130,9 +130,6 @@ #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL -#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7) -#define APPLE_DART_PTE_PROT_NO_READ (1<<8) - /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -406,15 +403,6 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; - if (data->iop.fmt == APPLE_DART) { - pte = 0; - if (!(prot & IOMMU_WRITE)) - pte |= APPLE_DART_PTE_PROT_NO_WRITE; - if (!(prot & IOMMU_READ)) - pte |= APPLE_DART_PTE_PROT_NO_READ; - return pte; - } - if (data->iop.fmt == ARM_64_LPAE_S1 || data->iop.fmt == ARM_32_LPAE_S1) { pte = ARM_LPAE_PTE_nG; @@ -1107,52 +1095,6 @@ out_free_data: return NULL; } -static struct io_pgtable * -apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) -{ - struct arm_lpae_io_pgtable *data; - int i; - - if (cfg->oas > 36) - return NULL; - - data = arm_lpae_alloc_pgtable(cfg); - if (!data) - return NULL; - - /* - * The table format itself always uses two levels, but the total VA - * space is mapped by four separate tables, making the MMIO registers - * an effective "level 1". For simplicity, though, we treat this - * equivalently to LPAE stage 2 concatenation at level 2, with the - * additional TTBRs each just pointing at consecutive pages. - */ - if (data->start_level < 1) - goto out_free_data; - if (data->start_level == 1 && data->pgd_bits > 2) - goto out_free_data; - if (data->start_level > 1) - data->pgd_bits = 0; - data->start_level = 2; - cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits; - data->pgd_bits += data->bits_per_level; - - data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, - cfg); - if (!data->pgd) - goto out_free_data; - - for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) - cfg->apple_dart_cfg.ttbr[i] = - virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data)); - - return &data->iop; - -out_free_data: - kfree(data); - return NULL; -} - struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, @@ -1178,11 +1120,6 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { .free = arm_lpae_free_pgtable, }; -struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = { - .alloc = apple_dart_alloc_pgtable, - .free = arm_lpae_free_pgtable, -}; - #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST static struct io_pgtable_cfg *cfg_cookie __initdata; diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c new file mode 100644 index 000000000000..8ca1ea313a80 --- /dev/null +++ b/drivers/iommu/io-pgtable-dart.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Apple DART page table allocator. + * + * Copyright (C) 2022 The Asahi Linux Contributors + * + * Based on io-pgtable-arm. + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon + */ + +#define pr_fmt(fmt) "dart io-pgtable: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DART1_MAX_ADDR_BITS 36 + +#define DART_MAX_TABLES 4 +#define DART_LEVELS 2 + +/* Struct accessors */ +#define io_pgtable_to_data(x) \ + container_of((x), struct dart_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define DART_GRANULE(d) \ + (sizeof(dart_iopte) << (d)->bits_per_level) +#define DART_PTES_PER_TABLE(d) \ + (DART_GRANULE(d) >> ilog2(sizeof(dart_iopte))) + +#define APPLE_DART1_PADDR_MASK GENMASK_ULL(35, 12) + +/* Apple DART1 protection bits */ +#define APPLE_DART1_PTE_PROT_NO_READ BIT(8) +#define APPLE_DART1_PTE_PROT_NO_WRITE BIT(7) +#define APPLE_DART1_PTE_PROT_SP_DIS BIT(1) + +/* marks PTE as valid */ +#define APPLE_DART_PTE_VALID BIT(0) + +/* IOPTE accessors */ +#define iopte_deref(pte, d) __va(iopte_to_paddr(pte, d)) + +struct dart_io_pgtable { + struct io_pgtable iop; + + int tbl_bits; + int bits_per_level; + + void *pgd[DART_MAX_TABLES]; +}; + +typedef u64 dart_iopte; + + +static dart_iopte paddr_to_iopte(phys_addr_t paddr, + struct dart_io_pgtable *data) +{ + return paddr & APPLE_DART1_PADDR_MASK; +} + +static phys_addr_t iopte_to_paddr(dart_iopte pte, + struct dart_io_pgtable *data) +{ + return pte & APPLE_DART1_PADDR_MASK; +} + +static void *__dart_alloc_pages(size_t size, gfp_t gfp, + struct io_pgtable_cfg *cfg) +{ + int order = get_order(size); + struct page *p; + + VM_BUG_ON((gfp & __GFP_HIGHMEM)); + p = alloc_pages(gfp | __GFP_ZERO, order); + if (!p) + return NULL; + + return page_address(p); +} + +static int dart_init_pte(struct dart_io_pgtable *data, + unsigned long iova, phys_addr_t paddr, + dart_iopte prot, int num_entries, + dart_iopte *ptep) +{ + int i; + dart_iopte pte = prot; + size_t sz = data->iop.cfg.pgsize_bitmap; + + for (i = 0; i < num_entries; i++) + if (ptep[i] & APPLE_DART_PTE_VALID) { + /* We require an unmap first */ + WARN_ON(ptep[i] & APPLE_DART_PTE_VALID); + return -EEXIST; + } + + pte |= APPLE_DART1_PTE_PROT_SP_DIS; + pte |= APPLE_DART_PTE_VALID; + + for (i = 0; i < num_entries; i++) + ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data); + + return 0; +} + +static dart_iopte dart_install_table(dart_iopte *table, + dart_iopte *ptep, + dart_iopte curr, + struct dart_io_pgtable *data) +{ + dart_iopte old, new; + + new = paddr_to_iopte(__pa(table), data) | APPLE_DART_PTE_VALID; + + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); + + old = cmpxchg64_relaxed(ptep, curr, new); + + return old; +} + +static int dart_get_table(struct dart_io_pgtable *data, unsigned long iova) +{ + return (iova >> (3 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) & + ((1 << data->tbl_bits) - 1); +} + +static int dart_get_l1_index(struct dart_io_pgtable *data, unsigned long iova) +{ + + return (iova >> (2 * data->bits_per_level + ilog2(sizeof(dart_iopte)))) & + ((1 << data->bits_per_level) - 1); +} + +static int dart_get_l2_index(struct dart_io_pgtable *data, unsigned long iova) +{ + + return (iova >> (data->bits_per_level + ilog2(sizeof(dart_iopte)))) & + ((1 << data->bits_per_level) - 1); +} + +static dart_iopte *dart_get_l2(struct dart_io_pgtable *data, unsigned long iova) +{ + dart_iopte pte, *ptep; + int tbl = dart_get_table(data, iova); + + ptep = data->pgd[tbl]; + if (!ptep) + return NULL; + + ptep += dart_get_l1_index(data, iova); + pte = READ_ONCE(*ptep); + + /* Valid entry? */ + if (!pte) + return NULL; + + /* Deref to get level 2 table */ + return iopte_deref(pte, data); +} + +static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data, + int prot) +{ + dart_iopte pte = 0; + + if (!(prot & IOMMU_WRITE)) + pte |= APPLE_DART1_PTE_PROT_NO_WRITE; + if (!(prot & IOMMU_READ)) + pte |= APPLE_DART1_PTE_PROT_NO_READ; + + return pte; +} + +static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int iommu_prot, gfp_t gfp, size_t *mapped) +{ + struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + size_t tblsz = DART_GRANULE(data); + int ret = 0, tbl, num_entries, max_entries, map_idx_start; + dart_iopte pte, *cptep, *ptep; + dart_iopte prot; + + if (WARN_ON(pgsize != cfg->pgsize_bitmap)) + return -EINVAL; + + if (WARN_ON(paddr >> cfg->oas)) + return -ERANGE; + + /* If no access, then nothing to do */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return 0; + + tbl = dart_get_table(data, iova); + + ptep = data->pgd[tbl]; + ptep += dart_get_l1_index(data, iova); + pte = READ_ONCE(*ptep); + + /* no L2 table present */ + if (!pte) { + cptep = __dart_alloc_pages(tblsz, gfp, cfg); + if (!cptep) + return -ENOMEM; + + pte = dart_install_table(cptep, ptep, 0, data); + if (pte) + free_pages((unsigned long)cptep, get_order(tblsz)); + + /* L2 table is present (now) */ + pte = READ_ONCE(*ptep); + } + + ptep = iopte_deref(pte, data); + + /* install a leaf entries into L2 table */ + prot = dart_prot_to_pte(data, iommu_prot); + map_idx_start = dart_get_l2_index(data, iova); + max_entries = DART_PTES_PER_TABLE(data) - map_idx_start; + num_entries = min_t(int, pgcount, max_entries); + ptep += map_idx_start; + ret = dart_init_pte(data, iova, paddr, prot, num_entries, ptep); + if (!ret && mapped) + *mapped += num_entries * pgsize; + + /* + * Synchronise all PTE updates for the new mapping before there's + * a chance for anything to kick off a table walk for the new iova. + */ + wmb(); + + return ret; +} + +static size_t dart_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + int i = 0, num_entries, max_entries, unmap_idx_start; + dart_iopte pte, *ptep; + + if (WARN_ON(pgsize != cfg->pgsize_bitmap || !pgcount)) + return 0; + + ptep = dart_get_l2(data, iova); + + /* Valid L2 IOPTE pointer? */ + if (WARN_ON(!ptep)) + return 0; + + unmap_idx_start = dart_get_l2_index(data, iova); + ptep += unmap_idx_start; + + max_entries = DART_PTES_PER_TABLE(data) - unmap_idx_start; + num_entries = min_t(int, pgcount, max_entries); + + while (i < num_entries) { + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + break; + + /* clear pte */ + *ptep = 0; + + if (!iommu_iotlb_gather_queued(gather)) + io_pgtable_tlb_add_page(&data->iop, gather, + iova + i * pgsize, pgsize); + + ptep++; + i++; + } + + return i * pgsize; +} + +static phys_addr_t dart_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct dart_io_pgtable *data = io_pgtable_ops_to_data(ops); + dart_iopte pte, *ptep; + + ptep = dart_get_l2(data, iova); + + /* Valid L2 IOPTE pointer? */ + if (!ptep) + return 0; + + ptep += dart_get_l2_index(data, iova); + + pte = READ_ONCE(*ptep); + /* Found translation */ + if (pte) { + iova &= (data->iop.cfg.pgsize_bitmap - 1); + return iopte_to_paddr(pte, data) | iova; + } + + /* Ran out of page tables to walk */ + return 0; +} + +static struct dart_io_pgtable * +dart_alloc_pgtable(struct io_pgtable_cfg *cfg) +{ + struct dart_io_pgtable *data; + int tbl_bits, bits_per_level, va_bits, pg_shift; + + pg_shift = __ffs(cfg->pgsize_bitmap); + bits_per_level = pg_shift - ilog2(sizeof(dart_iopte)); + + va_bits = cfg->ias - pg_shift; + + tbl_bits = max_t(int, 0, va_bits - (bits_per_level * DART_LEVELS)); + if ((1 << tbl_bits) > DART_MAX_TABLES) + return NULL; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->tbl_bits = tbl_bits; + data->bits_per_level = bits_per_level; + + data->iop.ops = (struct io_pgtable_ops) { + .map_pages = dart_map_pages, + .unmap_pages = dart_unmap_pages, + .iova_to_phys = dart_iova_to_phys, + }; + + return data; +} + +static struct io_pgtable * +apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct dart_io_pgtable *data; + int i; + + if (!cfg->coherent_walk) + return NULL; + + if (cfg->oas > DART1_MAX_ADDR_BITS) + return NULL; + + if (cfg->ias > cfg->oas) + return NULL; + + if (!(cfg->pgsize_bitmap == SZ_4K || cfg->pgsize_bitmap == SZ_16K)) + return NULL; + + data = dart_alloc_pgtable(cfg); + if (!data) + return NULL; + + cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits; + + for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) { + data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL, + cfg); + if (!data->pgd[i]) + goto out_free_data; + cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(data->pgd[i]); + } + + return &data->iop; + +out_free_data: + while (--i >= 0) + free_pages((unsigned long)data->pgd[i], + get_order(DART_GRANULE(data))); + kfree(data); + return NULL; +} + +static void apple_dart_free_pgtable(struct io_pgtable *iop) +{ + struct dart_io_pgtable *data = io_pgtable_to_data(iop); + dart_iopte *ptep, *end; + int i; + + for (i = 0; i < (1 << data->tbl_bits) && data->pgd[i]; ++i) { + ptep = data->pgd[i]; + end = (void *)ptep + DART_GRANULE(data); + + while (ptep != end) { + dart_iopte pte = *ptep++; + + if (pte) { + unsigned long page = + (unsigned long)iopte_deref(pte, data); + + free_pages(page, get_order(DART_GRANULE(data))); + } + } + free_pages((unsigned long)data->pgd[i], + get_order(DART_GRANULE(data))); + } + + kfree(data); +} + +struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = { + .alloc = apple_dart_alloc_pgtable, + .free = apple_dart_free_pgtable, +}; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index f4bfcef98297..16205ea9272c 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -20,6 +20,8 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns, +#endif +#ifdef CONFIG_IOMMU_IO_PGTABLE_DART [APPLE_DART] = &io_pgtable_apple_dart_init_fns, #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S From d8fe365a4f1c1aa5e2da3e41f50a08c9bd8d6112 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Fri, 16 Sep 2022 11:41:50 +0200 Subject: [PATCH 50/59] iommu/io-pgtable: Add DART subpage protection support DART allows to only expose a subpage to the device. While this is an optional feature on the M1 DARTs the new ones present on the Pro/Max models require this field in every PTE. Signed-off-by: Sven Peter Signed-off-by: Janne Grunau Reviewed-by: Rob Herring Acked-by: Hector Martin Link: https://lore.kernel.org/r/20220916094152.87137-4-j@jannau.net Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-dart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index 8ca1ea313a80..fc76b6168055 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "dart io-pgtable: " fmt #include +#include #include #include #include @@ -40,6 +41,9 @@ #define DART_PTES_PER_TABLE(d) \ (DART_GRANULE(d) >> ilog2(sizeof(dart_iopte))) +#define APPLE_DART_PTE_SUBPAGE_START GENMASK_ULL(63, 52) +#define APPLE_DART_PTE_SUBPAGE_END GENMASK_ULL(51, 40) + #define APPLE_DART1_PADDR_MASK GENMASK_ULL(35, 12) /* Apple DART1 protection bits */ @@ -107,6 +111,10 @@ static int dart_init_pte(struct dart_io_pgtable *data, return -EEXIST; } + /* subpage protection: always allow access to the entire page */ + pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0); + pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff); + pte |= APPLE_DART1_PTE_PROT_SP_DIS; pte |= APPLE_DART_PTE_VALID; From dc09fe1c5edd9c27a52cb6dc5a7bb4452d45c71c Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Fri, 16 Sep 2022 11:41:51 +0200 Subject: [PATCH 51/59] iommu/io-pgtable-dart: Add DART PTE support for t6000 The DARTs present in the M1 Pro/Max/Ultra SoC use a diffent PTE format. They support a 42bit physical address space by shifting the paddr and extending its mask inside the PTE. They also come with mandatory sub-page protection now which we just configure to always allow access to the entire page. This feature is already present but optional on the previous DARTs which allows to unconditionally configure it. Signed-off-by: Sven Peter Co-developed-by: Janne Grunau Signed-off-by: Janne Grunau Reviewed-by: Rob Herring Acked-by: Hector Martin Link: https://lore.kernel.org/r/20220916094152.87137-5-j@jannau.net Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-dart.c | 49 ++++++++++++++++++++++++++++----- drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 1 + 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index fc76b6168055..74b1ef2b96be 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -45,12 +45,19 @@ #define APPLE_DART_PTE_SUBPAGE_END GENMASK_ULL(51, 40) #define APPLE_DART1_PADDR_MASK GENMASK_ULL(35, 12) +#define APPLE_DART2_PADDR_MASK GENMASK_ULL(37, 10) +#define APPLE_DART2_PADDR_SHIFT (4) /* Apple DART1 protection bits */ #define APPLE_DART1_PTE_PROT_NO_READ BIT(8) #define APPLE_DART1_PTE_PROT_NO_WRITE BIT(7) #define APPLE_DART1_PTE_PROT_SP_DIS BIT(1) +/* Apple DART2 protection bits */ +#define APPLE_DART2_PTE_PROT_NO_READ BIT(3) +#define APPLE_DART2_PTE_PROT_NO_WRITE BIT(2) +#define APPLE_DART2_PTE_PROT_NO_CACHE BIT(1) + /* marks PTE as valid */ #define APPLE_DART_PTE_VALID BIT(0) @@ -72,13 +79,31 @@ typedef u64 dart_iopte; static dart_iopte paddr_to_iopte(phys_addr_t paddr, struct dart_io_pgtable *data) { - return paddr & APPLE_DART1_PADDR_MASK; + dart_iopte pte; + + if (data->iop.fmt == APPLE_DART) + return paddr & APPLE_DART1_PADDR_MASK; + + /* format is APPLE_DART2 */ + pte = paddr >> APPLE_DART2_PADDR_SHIFT; + pte &= APPLE_DART2_PADDR_MASK; + + return pte; } static phys_addr_t iopte_to_paddr(dart_iopte pte, struct dart_io_pgtable *data) { - return pte & APPLE_DART1_PADDR_MASK; + u64 paddr; + + if (data->iop.fmt == APPLE_DART) + return pte & APPLE_DART1_PADDR_MASK; + + /* format is APPLE_DART2 */ + paddr = pte & APPLE_DART2_PADDR_MASK; + paddr <<= APPLE_DART2_PADDR_SHIFT; + + return paddr; } static void *__dart_alloc_pages(size_t size, gfp_t gfp, @@ -190,10 +215,20 @@ static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data, { dart_iopte pte = 0; - if (!(prot & IOMMU_WRITE)) - pte |= APPLE_DART1_PTE_PROT_NO_WRITE; - if (!(prot & IOMMU_READ)) - pte |= APPLE_DART1_PTE_PROT_NO_READ; + if (data->iop.fmt == APPLE_DART) { + if (!(prot & IOMMU_WRITE)) + pte |= APPLE_DART1_PTE_PROT_NO_WRITE; + if (!(prot & IOMMU_READ)) + pte |= APPLE_DART1_PTE_PROT_NO_READ; + } + if (data->iop.fmt == APPLE_DART2) { + if (!(prot & IOMMU_WRITE)) + pte |= APPLE_DART2_PTE_PROT_NO_WRITE; + if (!(prot & IOMMU_READ)) + pte |= APPLE_DART2_PTE_PROT_NO_READ; + if (!(prot & IOMMU_CACHE)) + pte |= APPLE_DART2_PTE_PROT_NO_CACHE; + } return pte; } @@ -368,7 +403,7 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) if (!cfg->coherent_walk) return NULL; - if (cfg->oas > DART1_MAX_ADDR_BITS) + if (cfg->oas != 36 && cfg->oas != 42) return NULL; if (cfg->ias > cfg->oas) diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 16205ea9272c..49f46e1eabf7 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -23,6 +23,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_DART [APPLE_DART] = &io_pgtable_apple_dart_init_fns, + [APPLE_DART2] = &io_pgtable_apple_dart_init_fns, #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ca98aeadcc80..b768937382cd 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -17,6 +17,7 @@ enum io_pgtable_fmt { ARM_MALI_LPAE, AMD_IOMMU_V1, APPLE_DART, + APPLE_DART2, IO_PGTABLE_NUM_FMTS, }; From a380b8dcf22ccb5b872ae9ad7f4644cd0043aaee Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Fri, 16 Sep 2022 11:41:52 +0200 Subject: [PATCH 52/59] iommu: dart: Support t6000 variant The M1 Pro/Max/Ultra SoCs come with a new variant of DART which supports a larger physical address space with a different PTE format. Pass through the correct paddr address space size and the PTE format to the io-pgtable code which will take care of the rest. Signed-off-by: Sven Peter Co-developed-by: Janne Grunau Signed-off-by: Janne Grunau Reviewed-by: Rob Herring Acked-by: Hector Martin Link: https://lore.kernel.org/r/20220916094152.87137-6-j@jannau.net Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 1b1725759262..dbab37c6f8ae 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -81,10 +81,16 @@ #define DART_TTBR_VALID BIT(31) #define DART_TTBR_SHIFT 12 +struct apple_dart_hw { + u32 oas; + enum io_pgtable_fmt fmt; +}; + /* * Private structure associated with each DART device. * * @dev: device struct + * @hw: SoC-specific hardware data * @regs: mapped MMIO region * @irq: interrupt number, can be shared with other DARTs * @clks: clocks associated with this DART @@ -98,6 +104,7 @@ */ struct apple_dart { struct device *dev; + const struct apple_dart_hw *hw; void __iomem *regs; @@ -421,13 +428,13 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, pgtbl_cfg = (struct io_pgtable_cfg){ .pgsize_bitmap = dart->pgsize, .ias = 32, - .oas = 36, + .oas = dart->hw->oas, .coherent_walk = 1, .iommu_dev = dart->dev, }; dart_domain->pgtbl_ops = - alloc_io_pgtable_ops(APPLE_DART, &pgtbl_cfg, domain); + alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, domain); if (!dart_domain->pgtbl_ops) { ret = -ENOMEM; goto done; @@ -854,6 +861,7 @@ static int apple_dart_probe(struct platform_device *pdev) return -ENOMEM; dart->dev = dev; + dart->hw = of_device_get_match_data(dev); spin_lock_init(&dart->lock); dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); @@ -942,8 +950,18 @@ static int apple_dart_remove(struct platform_device *pdev) return 0; } +static const struct apple_dart_hw apple_dart_hw_t8103 = { + .oas = 36, + .fmt = APPLE_DART, +}; +static const struct apple_dart_hw apple_dart_hw_t6000 = { + .oas = 42, + .fmt = APPLE_DART2, +}; + static const struct of_device_id apple_dart_of_match[] = { - { .compatible = "apple,t8103-dart", .data = NULL }, + { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, + { .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 }, {}, }; MODULE_DEVICE_TABLE(of, apple_dart_of_match); From d2f2f1d10ccdb96aeea38c5ec647679fcc093b84 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 16 Sep 2022 14:31:47 +0100 Subject: [PATCH 53/59] dt-bindings: iommu: arm,smmu-v3: Relax order of interrupt names The QEMU devicetree uses a different order for SMMUv3 interrupt names, and there isn't a good reason for enforcing a specific order. Since all interrupt lines are optional, operating systems should not expect a fixed interrupt array layout; they should instead match each interrupt to its name individually. Besides, as a result of commit e4783856a2e8 ("dt-bindings: iommu: arm,smmu-v3: make PRI IRQ optional"), "cmdq-sync" and "priq" are already permutable. Relax the interrupt-names array entirely by allowing any permutation, incidentally making the schema more readable. Note that dt-validate won't allow duplicate names here so we don't need to specify maxItems or add additional checks, it's quite neat. Signed-off-by: Jean-Philippe Brucker Acked-by: Will Deacon Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220916133145.1910549-1-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/arm,smmu-v3.yaml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml index c57a53d87e4e..75fcf4cb52d9 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.yaml @@ -39,16 +39,11 @@ properties: any others. - minItems: 1 items: - - enum: - - eventq # Event Queue not empty - - gerror # Global Error activated - - const: gerror - - enum: - - cmdq-sync # CMD_SYNC complete - - priq # PRI Queue not empty - - enum: - - cmdq-sync - - priq + enum: + - eventq # Event Queue not empty + - gerror # Global Error activated + - cmdq-sync # CMD_SYNC complete + - priq # PRI Queue not empty '#iommu-cells': const: 1 From 06f4b8d09dbabec631ed7b033f5d5413d86c7134 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Sep 2022 21:15:24 +0800 Subject: [PATCH 54/59] iommu/vt-d: Remove unnecessary SVA data accesses in page fault path The existing I/O page fault handling code accesses the per-PASID SVA data structures. This is unnecessary and makes the fault handling code only suitable for SVA scenarios. This removes the SVA data accesses from the I/O page fault reporting and responding code, so that the fault handling code could be generic. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220914011821.400986-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/svm.c | 60 +++++-------------------------------- 2 files changed, 8 insertions(+), 54 deletions(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 74b0e19e23ee..b5fb7706e97c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -586,6 +586,7 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU_SVM struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ + unsigned long prq_seq_number; struct completion prq_complete; struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif @@ -761,7 +762,6 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; - unsigned long prq_seq_number; u32 pasid; int users; u16 did; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8bcfb93dda56..d1cab931dcb0 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -48,23 +48,6 @@ static void *pasid_private_find(ioasid_t pasid) return xa_load(&pasid_private_array, pasid); } -static struct intel_svm_dev * -svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) -{ - struct intel_svm_dev *sdev = NULL, *t; - - rcu_read_lock(); - list_for_each_entry_rcu(t, &svm->devs, list) { - if (t->sid == sid) { - sdev = t; - break; - } - } - rcu_read_unlock(); - - return sdev; -} - static struct intel_svm_dev * svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) { @@ -706,11 +689,10 @@ static void handle_bad_prq_event(struct intel_iommu *iommu, static irqreturn_t prq_event_thread(int irq, void *d) { - struct intel_svm_dev *sdev = NULL; struct intel_iommu *iommu = d; - struct intel_svm *svm = NULL; struct page_req_dsc *req; int head, tail, handled; + struct pci_dev *pdev; u64 address; /* @@ -730,8 +712,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) pr_err("IOMMU: %s: Page request without PASID\n", iommu->name); bad_req: - svm = NULL; - sdev = NULL; handle_bad_prq_event(iommu, req, QI_RESP_INVALID); goto prq_advance; } @@ -758,34 +738,19 @@ bad_req: if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) goto prq_advance; - if (!svm || svm->pasid != req->pasid) { - /* - * It can't go away, because the driver is not permitted - * to unbind the mm while any page faults are outstanding. - */ - svm = pasid_private_find(req->pasid); - if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) - goto bad_req; - } - - if (!sdev || sdev->sid != req->rid) { - sdev = svm_lookup_device_by_sid(svm, req->rid); - if (!sdev) - goto bad_req; - } - - sdev->prq_seq_number++; - + pdev = pci_get_domain_bus_and_slot(iommu->segment, + PCI_BUS_NUM(req->rid), + req->rid & 0xff); /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (intel_svm_prq_report(iommu, sdev->dev, req)) + if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, req->priv_data[0], req->priv_data[1], - sdev->prq_seq_number); + iommu->prq_seq_number++); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -881,8 +846,6 @@ int intel_svm_page_response(struct device *dev, struct iommu_page_response *msg) { struct iommu_fault_page_request *prm; - struct intel_svm_dev *sdev = NULL; - struct intel_svm *svm = NULL; struct intel_iommu *iommu; bool private_present; bool pasid_present; @@ -901,8 +864,6 @@ int intel_svm_page_response(struct device *dev, if (!msg || !evt) return -EINVAL; - mutex_lock(&pasid_mutex); - prm = &evt->fault.prm; sid = PCI_DEVID(bus, devfn); pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; @@ -919,12 +880,6 @@ int intel_svm_page_response(struct device *dev, goto out; } - ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); - if (ret || !sdev) { - ret = -ENODEV; - goto out; - } - /* * Per VT-d spec. v3.0 ch7.7, system software must respond * with page group response if private data is present (PDP) @@ -954,6 +909,5 @@ int intel_svm_page_response(struct device *dev, qi_submit_sync(iommu, &desc, 1, 0); } out: - mutex_unlock(&pasid_mutex); return ret; } From 0faa19a1515f8b04e9251b38ba522529906aeda7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Sep 2022 21:15:25 +0800 Subject: [PATCH 55/59] iommu/vt-d: Decouple PASID & PRI enabling from SVA Previously the PCI PASID and PRI capabilities are enabled in the path of iommu device probe only if INTEL_IOMMU_SVM is configured and the device supports ATS. As we've already decoupled the I/O page fault handler from SVA, we could also decouple PASID and PRI enabling from it to make room for growth of new features like kernel DMA with PASID, SIOV and nested translation. At the same time, the iommu_enable_dev_iotlb() helper is also called in iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) path. It's unnecessary and duplicate. This cleanups this helper to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220915085814.2261409-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 5 +-- drivers/iommu/intel/iommu.c | 78 ++++++++----------------------------- drivers/iommu/intel/iommu.h | 1 - 3 files changed, 18 insertions(+), 66 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 39a06d245f12..cd0ec7ed48b6 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -21,6 +21,8 @@ config INTEL_IOMMU select IOASID select IOMMU_DMA select PCI_ATS + select PCI_PRI + select PCI_PASID help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. @@ -48,10 +50,7 @@ config INTEL_IOMMU_DEBUGFS config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on X86_64 - select PCI_PASID - select PCI_PRI select MMU_NOTIFIER - select IOASID select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 31bc50e538a3..af17177b6d76 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -199,6 +199,11 @@ static inline void context_set_domain_id(struct context_entry *context, context->hi |= (value & ((1 << 16) - 1)) << 8; } +static inline void context_set_pasid(struct context_entry *context) +{ + context->lo |= CONTEXT_PASIDE; +} + static inline int context_domain_id(struct context_entry *c) { return((c->hi >> 8) & 0xffff); @@ -1350,21 +1355,18 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, } static struct device_domain_info * -iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu, - u8 bus, u8 devfn) +domain_lookup_dev_info(struct dmar_domain *domain, + struct intel_iommu *iommu, u8 bus, u8 devfn) { struct device_domain_info *info; unsigned long flags; - if (!iommu->qi) - return NULL; - spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) { if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { spin_unlock_irqrestore(&domain->lock, flags); - return info->ats_supported ? info : NULL; + return info; } } spin_unlock_irqrestore(&domain->lock, flags); @@ -1389,7 +1391,7 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } -static void iommu_enable_dev_iotlb(struct device_domain_info *info) +static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1412,7 +1414,6 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) info->pfsid = pci_dev_id(pf_pdev); } -#ifdef CONFIG_INTEL_IOMMU_SVM /* The PCIe spec, in its wisdom, declares that the behaviour of the device if you enable PASID support after ATS support is undefined. So always enable PASID support on devices which @@ -1425,7 +1426,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) info->pri_enabled = 1; -#endif + if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; @@ -1448,16 +1449,16 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) info->ats_enabled = 0; domain_update_iotlb(info->domain); } -#ifdef CONFIG_INTEL_IOMMU_SVM + if (info->pri_enabled) { pci_disable_pri(pdev); info->pri_enabled = 0; } + if (info->pasid_enabled) { pci_disable_pasid(pdev); info->pasid_enabled = 0; } -#endif } static void __iommu_flush_dev_iotlb(struct device_domain_info *info, @@ -1907,7 +1908,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, u8 bus, u8 devfn) { struct device_domain_info *info = - iommu_support_dev_iotlb(domain, iommu, bus, devfn); + domain_lookup_dev_info(domain, iommu, bus, devfn); u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; struct context_entry *context; @@ -1980,6 +1981,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_sm_dte(context); if (info && info->pri_supported) context_set_sm_pre(context); + if (info && info->pasid_supported) + context_set_pasid(context); } else { struct dma_pte *pgd = domain->pgd; int agaw; @@ -2037,7 +2040,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } else { iommu_flush_write_buffer(iommu); } - iommu_enable_dev_iotlb(info); + iommu_enable_pci_caps(info); ret = 0; @@ -4574,52 +4577,6 @@ static void intel_iommu_get_resv_regions(struct device *device, list_add_tail(®->list, head); } -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct context_entry *context; - struct dmar_domain *domain; - u64 ctx_lo; - int ret; - - domain = info->domain; - if (!domain) - return -EINVAL; - - spin_lock(&iommu->lock); - ret = -EINVAL; - if (!info->pasid_supported) - goto out; - - context = iommu_context_addr(iommu, info->bus, info->devfn, 0); - if (WARN_ON(!context)) - goto out; - - ctx_lo = context[0].lo; - - if (!(ctx_lo & CONTEXT_PASIDE)) { - ctx_lo |= CONTEXT_PASIDE; - context[0].lo = ctx_lo; - wmb(); - iommu->flush.flush_context(iommu, - domain_id_iommu(domain, iommu), - PCI_DEVID(info->bus, info->devfn), - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - } - - /* Enable PASID support in the device, if it wasn't already */ - if (!info->pasid_enabled) - iommu_enable_dev_iotlb(info); - - ret = 0; - - out: - spin_unlock(&iommu->lock); - - return ret; -} - static struct iommu_group *intel_iommu_device_group(struct device *dev) { if (dev_is_pci(dev)) @@ -4643,9 +4600,6 @@ static int intel_iommu_enable_sva(struct device *dev) if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) return -ENODEV; - if (intel_iommu_enable_pasid(iommu, dev)) - return -ENODEV; - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index b5fb7706e97c..8f29a183467d 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -742,7 +742,6 @@ extern int dmar_ir_support(void); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM From 4759858726e4b4a9dac740ec16f07612c90b4663 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Sep 2022 21:15:26 +0800 Subject: [PATCH 56/59] iommu/vt-d: Remove pasid_set_eafe() It is not used anywhere in the tree. Remove it to avoid dead code. No functional change intended. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220915081645.1834555-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c5e7e8b020a5..ccaf32949254 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -392,16 +392,6 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value) pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); } -/* - * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_eafe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); -} - static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) From b722cb32f0a558409fa5def9aaf0b82d9b553686 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 26 Sep 2022 21:15:27 +0800 Subject: [PATCH 57/59] iommu/vt-d: Rename cap_5lp_support to cap_fl5lp_support This renaming better describes it is for first level page table (a.k.a first stage page table since VT-d spec 3.4). Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220916071326.2223901-1-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cap_audit.c | 4 ++-- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/pasid.c | 2 +- drivers/iommu/intel/svm.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index 3ee68393122f..806986696841 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -37,7 +37,7 @@ static inline void check_dmar_capabilities(struct intel_iommu *a, MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, fl5lp_support, CAP_FL5LP_MASK); CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); @@ -84,7 +84,7 @@ static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type goto out; } - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl5lp_support, CAP_FL5LP_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index af17177b6d76..7410d6232cbb 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -404,7 +404,7 @@ static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) { unsigned long fl_sagaw, sl_sagaw; - fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0); + fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0); sl_sagaw = cap_sagaw(iommu->cap); /* Second level only. */ diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8f29a183467d..99cc75ecac63 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -146,7 +146,7 @@ /* * Decoding Capability Register */ -#define cap_5lp_support(c) (((c) >> 60) & 1) +#define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) #define cap_read_drain(c) (((c) >> 55) & 1) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ccaf32949254..c30ddac40ee5 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -519,7 +519,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } } - if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) { + if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { pr_err("No 5-level paging support for first-level on %s\n", iommu->name); return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d1cab931dcb0..7d08eb034f2d 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -164,7 +164,7 @@ void intel_svm_check(struct intel_iommu *iommu) } if (cpu_feature_enabled(X86_FEATURE_LA57) && - !cap_5lp_support(iommu->cap)) { + !cap_fl5lp_support(iommu->cap)) { pr_err("%s SVM disabled, incompatible paging mode\n", iommu->name); return; From eb5b20114b9710d1dcd4118dbf01b081c104bbc0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Sep 2022 21:15:28 +0800 Subject: [PATCH 58/59] iommu/vt-d: Avoid unnecessary global IRTE cache invalidation Some VT-d hardware implementations invalidate all interrupt remapping hardware translation caches as part of SIRTP flow. The VT-d spec adds a ESIRTPS (Enhanced Set Interrupt Remap Table Pointer Support, section 11.4.2 in VT-d spec) capability bit to indicate this. The spec also states in 11.4.4 that hardware also performs global invalidation on all interrupt remapping caches as part of Interrupt Remapping Disable operation if ESIRTPS capability bit is set. This checks the ESIRTPS capability bit and skip software global cache invalidation if it's set. Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220921065741.3572495-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 + drivers/iommu/intel/irq_remapping.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 99cc75ecac63..bddf6c69587d 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -146,6 +146,7 @@ /* * Decoding Capability Register */ +#define cap_esirtps(c) (((c) >> 62) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 2e9683e970f8..5962bb5027d0 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -494,7 +494,8 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) * Global invalidation of interrupt entry cache to make sure the * hardware uses the new irq remapping table. */ - qi_global_iec(iommu); + if (!cap_esirtps(iommu->cap)) + qi_global_iec(iommu); } static void iommu_enable_irq_remapping(struct intel_iommu *iommu) @@ -680,7 +681,8 @@ static void iommu_disable_irq_remapping(struct intel_iommu *iommu) * global invalidation of interrupt entry cache before disabling * interrupt-remapping. */ - qi_global_iec(iommu); + if (!cap_esirtps(iommu->cap)) + qi_global_iec(iommu); raw_spin_lock_irqsave(&iommu->register_lock, flags); From 6ad931a232e71620c6dbb8d573ccef51f84f2566 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Sep 2022 21:15:29 +0800 Subject: [PATCH 59/59] iommu/vt-d: Avoid unnecessary global DMA cache invalidation Some VT-d hardware implementations invalidate all DMA remapping hardware translation caches as part of SRTP flow. The VT-d spec adds a ESRTPS (Enhanced Set Root Table Pointer Support, section 11.4.2 in VT-d spec) capability bit to indicate this. With this bit set, software has no need to issue the global invalidation request. Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220919062523.3438951-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 +++++++ drivers/iommu/intel/iommu.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7410d6232cbb..2d142ee7bbfa 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1239,6 +1239,13 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + /* + * Hardware invalidates all DMA remapping hardware translation + * caches as part of SRTP flow. + */ + if (cap_esrtps(iommu->cap)) + return; + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); if (sm_supported(iommu)) qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index bddf6c69587d..92023dff9513 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -146,6 +146,7 @@ /* * Decoding Capability Register */ +#define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1)