From 0d3642883b092ccfc0b044c6581ee2c1f32ab165 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 5 Sep 2017 17:56:17 -0500 Subject: [PATCH 01/54] iommu/omap: Change the attach detection logic The OMAP IOMMU driver allows only a single device (eg: a rproc device) to be attached per domain. The current attach detection logic relies on a check for an attached iommu for the respective client device. Change this logic to use the client device pointer instead in preparation for supporting multiple iommu devices to be bound to a single iommu domain, and thereby to a client device. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bd67e1b2c64e..81ef729994ce 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -805,7 +805,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) struct iommu_domain *domain = obj->domain; struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - if (!omap_domain->iommu_dev) + if (!omap_domain->dev) return IRQ_NONE; errs = iommu_report_fault(obj, &da); @@ -1118,8 +1118,8 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) spin_lock(&omap_domain->lock); - /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev) { + /* only a single client device can be attached to a domain */ + if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); ret = -EBUSY; goto out; @@ -1148,9 +1148,14 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, { struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + if (!omap_domain->dev) { + dev_err(dev, "domain has no attached device\n"); + return; + } + /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev != oiommu) { - dev_err(dev, "invalid iommu device\n"); + if (omap_domain->dev != dev) { + dev_err(dev, "invalid attached device\n"); return; } @@ -1219,7 +1224,7 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) * An iommu device is still attached * (currently, only one device can be attached) ? */ - if (omap_domain->iommu_dev) + if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); kfree(omap_domain->pgtable); From 9d5018deec86673ef8418546a3ac43e47dbff3b9 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 5 Sep 2017 17:56:18 -0500 Subject: [PATCH 02/54] iommu/omap: Add support to program multiple iommus A client user instantiates and attaches to an iommu_domain to program the OMAP IOMMU associated with the domain. The iommus programmed by a client user are bound with the iommu_domain through the user's device archdata. The OMAP IOMMU driver currently supports only one IOMMU per IOMMU domain per user. The OMAP IOMMU driver has been enhanced to support allowing multiple IOMMUs to be programmed by a single client user. This support is being added mainly to handle the DSP subsystems on the DRA7xx SoCs, which have two MMUs within the same subsystem. These MMUs provide translations for a processor core port and an internal EDMA port. This support allows both the MMUs to be programmed together, but with each one retaining it's own internal state objects. The internal EDMA block is managed by the software running on the DSPs, and this design provides on-par functionality with previous generation OMAP DSPs where the EDMA and the DSP core shared the same MMU. The multiple iommus are expected to be provided through a sentinel terminated array of omap_iommu_arch_data objects through the client user's device archdata. The OMAP driver core is enhanced to loop through the array of attached iommus and program them for all common operations. The sentinel-terminated logic is used so as to not change the omap_iommu_arch_data structure. NOTE: 1. The IOMMU group and IOMMU core registration is done only for the DSP processor core MMU even though both MMUs are represented by their own platform device and are probed individually. The IOMMU device linking uses this registered MMU device. The struct iommu_device for the second MMU is not used even though memory for it is allocated. 2. The OMAP IOMMU debugfs code still continues to operate on individual IOMMU objects. Signed-off-by: Suman Anna [t-kristo@ti.com: ported support to 4.13 based kernel] Signed-off-by: Tero Kristo Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 364 ++++++++++++++++++++++++++++--------- drivers/iommu/omap-iommu.h | 30 ++- 2 files changed, 288 insertions(+), 106 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 81ef729994ce..e135ab830ebf 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -2,6 +2,7 @@ * omap iommu: tlb and pagetable primitives * * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/ * * Written by Hiroshi DOYU , * Paul Mundt and Toshihiro Kobayashi @@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) **/ void omap_iommu_save_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - p[i] = iommu_read_reg(obj, i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + p[i] = iommu_read_reg(obj, i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); @@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); **/ void omap_iommu_restore_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - iommu_write_reg(obj, p[i], i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + iommu_write_reg(obj, p[i], i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); @@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static bool omap_iommu_can_register(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return true; + + /* + * restrict IOMMU core registration only for processor-port MDMA MMUs + * on DRA7 DSPs + */ + if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) || + (!strcmp(dev_name(&pdev->dev), "41501000.mmu"))) + return true; + + return false; +} + static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, struct omap_iommu *obj) { @@ -984,19 +1023,22 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); - obj->group = iommu_group_alloc(); - if (IS_ERR(obj->group)) - return PTR_ERR(obj->group); + if (omap_iommu_can_register(pdev)) { + obj->group = iommu_group_alloc(); + if (IS_ERR(obj->group)) + return PTR_ERR(obj->group); - err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); - if (err) - goto out_group; + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, + obj->name); + if (err) + goto out_group; - iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); - err = iommu_device_register(&obj->iommu); - if (err) - goto out_sysfs; + err = iommu_device_register(&obj->iommu); + if (err) + goto out_sysfs; + } pm_runtime_irq_safe(obj->dev); pm_runtime_enable(obj->dev); @@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); - iommu_group_put(obj->group); - obj->group = NULL; + if (obj->group) { + iommu_group_put(obj->group); + obj->group = NULL; - iommu_device_sysfs_remove(&obj->iommu); - iommu_device_unregister(&obj->iommu); + iommu_device_sysfs_remove(&obj->iommu); + iommu_device_unregister(&obj->iommu); + } omap_iommu_debugfs_remove(obj); @@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, phys_addr_t pa, size_t bytes, int prot) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; struct iotlb_entry e; int omap_pgsz; - u32 ret; + u32 ret = -EINVAL; + int i; omap_pgsz = bytes_to_iopgsz(bytes); if (omap_pgsz < 0) { @@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, iotlb_init_entry(&e, da, pa, omap_pgsz); - ret = omap_iopgtable_store_entry(oiommu, &e); - if (ret) - dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + ret = omap_iopgtable_store_entry(oiommu, &e); + if (ret) { + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", + ret); + break; + } + } + + if (ret) { + while (i--) { + iommu--; + oiommu = iommu->iommu_dev; + iopgtable_clear_entry(oiommu, da); + } + } return ret; } @@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, size_t size) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; + bool error = false; + size_t bytes = 0; + int i; dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); - return iopgtable_clear_entry(oiommu, da); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + bytes = iopgtable_clear_entry(oiommu, da); + if (!bytes) + error = true; + } + + /* + * simplify return - we are only checking if any of the iommus + * reported an error, but not if all of them are unmapping the + * same number of entries. This should not occur due to the + * mirror programming. + */ + return error ? 0 : bytes; +} + +static int omap_iommu_count(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + int count = 0; + + while (arch_data->iommu_dev) { + count++; + arch_data++; + } + + return count; +} + +/* caller should call cleanup if this function fails */ +static int omap_iommu_attach_init(struct device *dev, + struct omap_iommu_domain *odomain) +{ + struct omap_iommu_device *iommu; + int i; + + odomain->num_iommus = omap_iommu_count(dev); + if (!odomain->num_iommus) + return -EINVAL; + + odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), + GFP_ATOMIC); + if (!odomain->iommus) + return -ENOMEM; + + iommu = odomain->iommus; + for (i = 0; i < odomain->num_iommus; i++, iommu++) { + iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC); + if (!iommu->pgtable) + return -ENOMEM; + + /* + * should never fail, but please keep this around to ensure + * we keep the hardware happy + */ + if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable, + IOPGD_TABLE_SIZE))) + return -EINVAL; + } + + return 0; +} + +static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain) +{ + int i; + struct omap_iommu_device *iommu = odomain->iommus; + + for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++) + kfree(iommu->pgtable); + + kfree(odomain->iommus); + odomain->num_iommus = 0; + odomain->iommus = NULL; } static int @@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu; struct omap_iommu *oiommu; int ret = 0; + int i; if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); @@ -1125,19 +1266,42 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out; } - oiommu = arch_data->iommu_dev; - - /* get a handle to and enable the omap iommu */ - ret = omap_iommu_attach(oiommu, omap_domain->pgtable); + ret = omap_iommu_attach_init(dev, omap_domain); if (ret) { - dev_err(dev, "can't get omap iommu: %d\n", ret); - goto out; + dev_err(dev, "failed to allocate required iommu data %d\n", + ret); + goto init_fail; } - omap_domain->iommu_dev = oiommu; - omap_domain->dev = dev; - oiommu->domain = domain; + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) { + /* configure and enable the omap iommu */ + oiommu = arch_data->iommu_dev; + ret = omap_iommu_attach(oiommu, iommu->pgtable); + if (ret) { + dev_err(dev, "can't get omap iommu: %d\n", ret); + goto attach_fail; + } + oiommu->domain = domain; + iommu->iommu_dev = oiommu; + } + + omap_domain->dev = dev; + + goto out; + +attach_fail: + while (i--) { + iommu--; + arch_data--; + oiommu = iommu->iommu_dev; + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } +init_fail: + omap_iommu_detach_fini(omap_domain); out: spin_unlock(&omap_domain->lock); return ret; @@ -1146,7 +1310,10 @@ out: static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, struct device *dev) { - struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu; + int i; if (!omap_domain->dev) { dev_err(dev, "domain has no attached device\n"); @@ -1159,13 +1326,24 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, return; } - iopgtable_clear_entry_all(oiommu); + /* + * cleanup in the reverse order of attachment - this addresses + * any h/w dependencies between multiple instances, if any + */ + iommu += (omap_domain->num_iommus - 1); + arch_data += (omap_domain->num_iommus - 1); + for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) { + oiommu = iommu->iommu_dev; + iopgtable_clear_entry_all(oiommu); - omap_iommu_detach(oiommu); + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } + + omap_iommu_detach_fini(omap_domain); - omap_domain->iommu_dev = NULL; omap_domain->dev = NULL; - oiommu->domain = NULL; } static void omap_iommu_detach_dev(struct iommu_domain *domain, @@ -1187,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); if (!omap_domain) - goto out; - - omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); - if (!omap_domain->pgtable) - goto fail_nomem; - - /* - * should never fail, but please keep this around to ensure - * we keep the hardware happy - */ - if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) - goto fail_align; + return NULL; spin_lock_init(&omap_domain->lock); @@ -1207,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain->domain.geometry.force_aperture = true; return &omap_domain->domain; - -fail_align: - kfree(omap_domain->pgtable); -fail_nomem: - kfree(omap_domain); -out: - return NULL; } static void omap_iommu_domain_free(struct iommu_domain *domain) @@ -1227,7 +1387,6 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); - kfree(omap_domain->pgtable); kfree(omap_domain); } @@ -1235,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t da) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu = iommu->iommu_dev; struct device *dev = oiommu->dev; u32 *pgd, *pte; phys_addr_t ret = 0; + /* + * all the iommus within the domain will have identical programming, + * so perform the lookup using just the first iommu + */ iopgtable_lookup_entry(oiommu, da, &pgd, &pte); if (pte) { @@ -1265,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, static int omap_iommu_add_device(struct device *dev) { - struct omap_iommu_arch_data *arch_data; + struct omap_iommu_arch_data *arch_data, *tmp; struct omap_iommu *oiommu; struct iommu_group *group; struct device_node *np; struct platform_device *pdev; + int num_iommus, i; int ret; /* @@ -1281,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev) if (!dev->of_node) return 0; - np = of_parse_phandle(dev->of_node, "iommus", 0); - if (!np) + /* + * retrieve the count of IOMMU nodes using phandle size as element size + * since #iommu-cells = 0 for OMAP + */ + num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", + sizeof(phandle)); + if (num_iommus < 0) return 0; - pdev = of_find_device_by_node(np); - if (WARN_ON(!pdev)) { - of_node_put(np); - return -EINVAL; - } - - oiommu = platform_get_drvdata(pdev); - if (!oiommu) { - of_node_put(np); - return -EINVAL; - } - - arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); - if (!arch_data) { - of_node_put(np); + arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL); + if (!arch_data) return -ENOMEM; + + for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) { + np = of_parse_phandle(dev->of_node, "iommus", i); + if (!np) { + kfree(arch_data); + return -EINVAL; + } + + pdev = of_find_device_by_node(np); + if (WARN_ON(!pdev)) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + oiommu = platform_get_drvdata(pdev); + if (!oiommu) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + tmp->iommu_dev = oiommu; + + of_node_put(np); } + /* + * use the first IOMMU alone for the sysfs device linking. + * TODO: Evaluate if a single iommu_group needs to be + * maintained for both IOMMUs + */ + oiommu = arch_data->iommu_dev; ret = iommu_device_link(&oiommu->iommu, dev); if (ret) { kfree(arch_data); - of_node_put(np); return ret; } - arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; /* @@ -1326,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev) } iommu_group_put(group); - of_node_put(np); - return 0; } diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a675af29a6ec..1703159ef5af 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -28,18 +28,27 @@ struct iotlb_entry { u32 endian, elsz, mixed; }; +/** + * struct omap_iommu_device - omap iommu device data + * @pgtable: page table used by an omap iommu attached to a domain + * @iommu_dev: pointer to store an omap iommu instance attached to a domain + */ +struct omap_iommu_device { + u32 *pgtable; + struct omap_iommu *iommu_dev; +}; + /** * struct omap_iommu_domain - omap iommu domain - * @pgtable: the page table - * @iommu_dev: an omap iommu device attached to this domain. only a single - * iommu device can be attached for now. + * @num_iommus: number of iommus in this domain + * @iommus: omap iommu device data for all iommus in this domain * @dev: Device using this domain. * @lock: domain lock, should be taken when attaching/detaching * @domain: generic domain handle used by iommu core code */ struct omap_iommu_domain { - u32 *pgtable; - struct omap_iommu *iommu_dev; + u32 num_iommus; + struct omap_iommu_device *iommus; struct device *dev; spinlock_t lock; struct iommu_domain domain; @@ -97,17 +106,6 @@ struct iotlb_lock { short vict; }; -/** - * dev_to_omap_iommu() - retrieves an omap iommu object from a user device - * @dev: iommu client device - */ -static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) -{ - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; - - return arch_data->iommu_dev; -} - /* * MMU Register offsets */ From 7a974b29fe5d3704eafec707ba6390c3288c80fe Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 15 Sep 2017 13:05:08 +0200 Subject: [PATCH 03/54] iommu/exynos: Rework runtime PM links management add_device is a bit more suitable for establishing runtime PM links than the xlate callback. This change also makes it possible to implement proper cleanup - in remove_device callback. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index f596fcc32898..91c548d49b92 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -263,6 +263,7 @@ struct exynos_iommu_domain { struct sysmmu_drvdata { struct device *sysmmu; /* SYSMMU controller device */ struct device *master; /* master device (owner) */ + struct device_link *link; /* runtime PM link to master */ void __iomem *sfrbase; /* our registers */ struct clk *clk; /* SYSMMU's clock */ struct clk *aclk; /* SYSMMU's aclk clock */ @@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev) static int exynos_iommu_add_device(struct device *dev) { + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; struct iommu_group *group; if (!has_sysmmu(dev)) @@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); + list_for_each_entry(data, &owner->controllers, owner_node) { + /* + * SYSMMU will be runtime activated via device link + * (dependency) to its master device, so there are no + * direct calls to pm_runtime_get/put in this driver. + */ + data->link = device_link_add(dev, data->sysmmu, + DL_FLAG_PM_RUNTIME); + } iommu_group_put(group); return 0; @@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev) static void exynos_iommu_remove_device(struct device *dev) { struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; if (!has_sysmmu(dev)) return; @@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev) } } iommu_group_remove_device(dev); + + list_for_each_entry(data, &owner->controllers, owner_node) + device_link_del(data->link); } static int exynos_iommu_of_xlate(struct device *dev, @@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev, list_add_tail(&data->owner_node, &owner->controllers); data->master = dev; - /* - * SYSMMU will be runtime activated via device link (dependency) to its - * master device, so there are no direct calls to pm_runtime_get/put - * in this driver. - */ - device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME); - return 0; } From 2070f940a6d5148cf2df0d0087ff0a64d9f15237 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:42 +0100 Subject: [PATCH 04/54] iommu/iova: Optimise rbtree searching Checking the IOVA bounds separately before deciding which direction to continue the search (if necessary) results in redundantly comparing both pfns twice each. GCC can already determine that the final comparison op is redundant and optimise it down to 3 in total, but we can go one further with a little tweak of the ordering (which makes the intent of the code that much cleaner as a bonus). Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: rewrote commit message to clarify] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 33edfa794ae9..f129ff4f5c89 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -342,15 +342,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) while (node) { struct iova *iova = rb_entry(node, struct iova, node); - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - return iova; - } - if (pfn < iova->pfn_lo) node = node->rb_left; - else if (pfn > iova->pfn_lo) + else if (pfn > iova->pfn_hi) node = node->rb_right; + else + return iova; /* pfn falls within iova's range */ } return NULL; From 086c83acb70fc6da044c9ca45c1c9780c64545b0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:43 +0100 Subject: [PATCH 05/54] iommu/iova: Optimise the padding calculation The mask for calculating the padding size doesn't change, so there's no need to recalculate it every loop iteration. Furthermore, Once we've done that, it becomes clear that we don't actually need to calculate a padding size at all - by flipping the arithmetic around, we can just combine the upper limit, size, and mask directly to check against the lower limit. For an arm64 build, this alone knocks 20% off the object code size of the entire alloc_iova() function! Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: simplified more of the arithmetic, rewrote commit message] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f129ff4f5c89..20be9a8b3188 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -182,24 +182,17 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, rb_insert_color(&iova->node, root); } -/* - * Computes the padding size required, to make the start address - * naturally aligned on the power-of-two order of its size - */ -static unsigned int -iova_get_pad_size(unsigned int size, unsigned int limit_pfn) -{ - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; + unsigned long saved_pfn, new_pfn; + unsigned long align_mask = ~0UL; + + if (size_aligned) + align_mask <<= fls_long(size - 1); /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); @@ -209,31 +202,26 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, while (curr) { struct iova *curr_iova = rb_entry(curr, struct iova, node); - if (limit_pfn <= curr_iova->pfn_lo) { + if (limit_pfn <= curr_iova->pfn_lo) goto move_left; - } else if (limit_pfn > curr_iova->pfn_hi) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) - break; /* found a free slot */ - } + + if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) + break; /* found a free slot */ + limit_pfn = curr_iova->pfn_lo; move_left: prev = curr; curr = rb_prev(curr); } - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + new_pfn = (limit_pfn - size) & align_mask; + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size); + new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ From e60aa7b53845a261dd419652f12ab9f89e668843 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:44 +0100 Subject: [PATCH 06/54] iommu/iova: Extend rbtree node caching The cached node mechanism provides a significant performance benefit for allocations using a 32-bit DMA mask, but in the case of non-PCI devices or where the 32-bit space is full, the loss of this benefit can be significant - on large systems there can be many thousands of entries in the tree, such that walking all the way down to find free space every time becomes increasingly awful. Maintain a similar cached node for the whole IOVA space as a superset of the 32-bit space so that performance can remain much more consistent. Inspired by work by Zhen Lei . Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 60 +++++++++++++++++++++----------------------- include/linux/iova.h | 3 ++- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 20be9a8b3188..c6f5a22f8d20 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -48,6 +48,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; + iovad->cached_node = NULL; iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; @@ -110,48 +111,44 @@ EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) + struct rb_node *cached_node = NULL; + struct iova *curr_iova; + + if (*limit_pfn <= iovad->dma_32bit_pfn) + cached_node = iovad->cached32_node; + if (!cached_node) + cached_node = iovad->cached_node; + if (!cached_node) return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } + + curr_iova = rb_entry(cached_node, struct iova, node); + *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); + + return rb_prev(cached_node); } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); + cached_iova = rb_entry(iovad->cached32_node, struct iova, node); + if (free->pfn_hi < iovad->dma_32bit_pfn && + iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached32_node = rb_next(&free->node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; - } + cached_iova = rb_entry(iovad->cached_node, struct iova, node); + if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached_node = rb_next(&free->node); } /* Insert the iova into domain rbtree by holding writer lock */ @@ -188,7 +185,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn, new_pfn; + unsigned long new_pfn; unsigned long align_mask = ~0UL; if (size_aligned) @@ -196,7 +193,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, &limit_pfn); prev = curr; while (curr) { @@ -226,7 +222,7 @@ move_left: /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); diff --git a/include/linux/iova.h b/include/linux/iova.h index d179b9bf7814..69ea3e258ff2 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -70,7 +70,8 @@ struct iova_fq { struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + struct rb_node *cached_node; /* Save last alloced node */ + struct rb_node *cached32_node; /* Save last 32-bit alloced node */ unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; From aa3ac9469c1850ed00741955b975c3a19029763a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:45 +0100 Subject: [PATCH 07/54] iommu/iova: Make dma_32bit_pfn implicit Now that the cached node optimisation can apply to all allocations, the couple of users which were playing tricks with dma_32bit_pfn in order to benefit from it can stop doing so. Conversely, there is also no need for all the other users to explicitly calculate a 'real' 32-bit PFN, when init_iova_domain() can happily do that itself from the page granularity. CC: Thierry Reding CC: Jonathan Hunter CC: David Airlie CC: Sudeep Dutt CC: Ashutosh Dixit Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: use iova_shift(), rewrote commit message] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/gpu/drm/tegra/drm.c | 3 +-- drivers/gpu/host1x/dev.c | 3 +-- drivers/iommu/amd_iommu.c | 7 ++----- drivers/iommu/dma-iommu.c | 18 +----------------- drivers/iommu/intel-iommu.c | 11 +++-------- drivers/iommu/iova.c | 4 ++-- drivers/misc/mic/scif/scif_rma.c | 3 +-- include/linux/iova.h | 5 ++--- 8 files changed, 13 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 597d563d636a..b822e484b7e5 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) order = __ffs(tegra->domain->pgsize_bitmap); init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order, - carveout_end >> order); + carveout_start >> order); tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.limit = carveout_end >> tegra->carveout.shift; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7f22c5c37660..5267c62e8896 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev) order = __ffs(host->domain->pgsize_bitmap); init_iova_domain(&host->iova, 1UL << order, - geometry->aperture_start >> order, - geometry->aperture_end >> order); + geometry->aperture_start >> order); host->iova_end = geometry->aperture_end; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..647ab7691aee 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -63,7 +63,6 @@ /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) @@ -1788,8 +1787,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; @@ -2696,8 +2694,7 @@ static int init_reserved_iova_ranges(void) struct pci_dev *pdev = NULL; struct iova *val; - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9d1cebe7f6cb..191be9c80a8a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* ...then finally give it a kicking to make sure it fits */ base_pfn = max_t(unsigned long, base_pfn, domain->geometry.aperture_start >> order); - end_pfn = min_t(unsigned long, end_pfn, - domain->geometry.aperture_end >> order); } - /* - * PCI devices may have larger DMA masks, but still prefer allocating - * within a 32-bit mask to avoid DAC addressing. Such limitations don't - * apply to the typical platform device, so for those we may as well - * leave the cache limit at the top of their range to save an rb_last() - * traversal on every allocation. - */ - if (dev && dev_is_pci(dev)) - end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */ if (iovad->start_pfn) { @@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, pr_warn("Incompatible range for DMA domain\n"); return -EFAULT; } - /* - * If we have devices with different DMA masks, move the free - * area cache limit down for the benefit of the smaller one. - */ - iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); return 0; } - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + init_iova_domain(iovad, 1UL << order, base_pfn); if (!dev) return 0; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6784a05dd6b2..ebb48353dd39 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -82,8 +82,6 @@ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) -#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) /* page table handling */ #define LEVEL_STRIDE (9) @@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, unsigned long sagaw; int err; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); err = init_iova_flush_queue(&domain->iovad, iommu_flush_iova, iova_entry_free); @@ -4897,8 +4893,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index c6f5a22f8d20..65032e60a5d1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -37,7 +37,7 @@ static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit) + unsigned long start_pfn) { /* * IOVA granularity will normally be equal to the smallest @@ -52,7 +52,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); iovad->flush_cb = NULL; iovad->fq = NULL; init_iova_rcaches(iovad); diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 329727e00e97..c824329f7012 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep) struct scif_endpt_rma_info *rma = &ep->rma_info; mutex_init(&rma->rma_lock); - init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, - SCIF_DMA_64BIT_PFN); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); spin_lock_init(&rma->tc_lock); mutex_init(&rma->mmn_lock); INIT_LIST_HEAD(&rma->reg_list); diff --git a/include/linux/iova.h b/include/linux/iova.h index 69ea3e258ff2..953cfd20f152 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -154,7 +154,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit); + unsigned long start_pfn); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -230,8 +230,7 @@ static inline void copy_reserved_iova(struct iova_domain *from, static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, - unsigned long pfn_32bit) + unsigned long start_pfn) { } From bb68b2fbfbd643d4407541f9c7a16a2c9b3a57c7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:46 +0100 Subject: [PATCH 08/54] iommu/iova: Add rbtree anchor node Add a permanent dummy IOVA reservation to the rbtree, such that we can always access the top of the address space instantly. The immediate benefit is that we remove the overhead of the rb_last() traversal when not using the cached node, but it also paves the way for further simplifications. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 15 +++++++++++++-- include/linux/iova.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 65032e60a5d1..9e04c1f3e740 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -24,6 +24,9 @@ #include #include +/* The anchor node sits above the top of the usable address space */ +#define IOVA_ANCHOR ~0UL + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); @@ -55,6 +58,9 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); iovad->flush_cb = NULL; iovad->fq = NULL; + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -119,7 +125,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) if (!cached_node) cached_node = iovad->cached_node; if (!cached_node) - return rb_last(&iovad->rbroot); + return rb_prev(&iovad->anchor.node); curr_iova = rb_entry(cached_node, struct iova, node); *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); @@ -242,7 +248,8 @@ EXPORT_SYMBOL(alloc_iova_mem); void free_iova_mem(struct iova *iova) { - kmem_cache_free(iova_cache, iova); + if (iova->pfn_lo != IOVA_ANCHOR) + kmem_cache_free(iova_cache, iova); } EXPORT_SYMBOL(free_iova_mem); @@ -676,6 +683,10 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; + /* Don't allow nonsensical pfns */ + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) + return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { diff --git a/include/linux/iova.h b/include/linux/iova.h index 953cfd20f152..c696ee81054e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,6 +75,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU From 973f5fbedb0721ab964386a5fe5120998e71580c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:47 +0100 Subject: [PATCH 09/54] iommu/iova: Simplify cached node logic The logic of __get_cached_rbnode() is a little obtuse, but then __get_prev_node_of_cached_rbnode_or_last_node_and_update_limit_pfn() wouldn't exactly roll off the tongue... Now that we have the invariant that there is always a valid node to start searching downwards from, everything gets a bit easier to follow if we simplify that function to do what it says on the tin and return the cached node (or anchor node as appropriate) directly. In turn, we can then deduplicate the rb_prev() and limit_pfn logic into the main loop itself, further reduce the amount of code under the lock, and generally make the inner workings a bit less subtle. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 51 +++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 9e04c1f3e740..7b7363518733 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -51,8 +51,8 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; - iovad->cached_node = NULL; - iovad->cached32_node = NULL; + iovad->cached_node = &iovad->anchor.node; + iovad->cached32_node = &iovad->anchor.node; iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); @@ -115,22 +115,12 @@ int init_iova_flush_queue(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) { - struct rb_node *cached_node = NULL; - struct iova *curr_iova; + if (limit_pfn <= iovad->dma_32bit_pfn) + return iovad->cached32_node; - if (*limit_pfn <= iovad->dma_32bit_pfn) - cached_node = iovad->cached32_node; - if (!cached_node) - cached_node = iovad->cached_node; - if (!cached_node) - return rb_prev(&iovad->anchor.node); - - curr_iova = rb_entry(cached_node, struct iova, node); - *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); - - return rb_prev(cached_node); + return iovad->cached_node; } static void @@ -149,11 +139,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free->pfn_hi < iovad->dma_32bit_pfn && - iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo) + free->pfn_lo >= cached_iova->pfn_lo) iovad->cached32_node = rb_next(&free->node); cached_iova = rb_entry(iovad->cached_node, struct iova, node); - if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo) + if (free->pfn_lo >= cached_iova->pfn_lo) iovad->cached_node = rb_next(&free->node); } @@ -189,7 +179,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; + struct rb_node *curr, *prev; + struct iova *curr_iova; unsigned long flags; unsigned long new_pfn; unsigned long align_mask = ~0UL; @@ -199,24 +190,16 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = rb_entry(curr, struct iova, node); - - if (limit_pfn <= curr_iova->pfn_lo) - goto move_left; - - if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) - break; /* found a free slot */ - - limit_pfn = curr_iova->pfn_lo; -move_left: + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = rb_entry(curr, struct iova, node); + do { + limit_pfn = min(limit_pfn, curr_iova->pfn_lo); + new_pfn = (limit_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - } + curr_iova = rb_entry(curr, struct iova, node); + } while (curr && new_pfn <= curr_iova->pfn_hi); - new_pfn = (limit_pfn - size) & align_mask; if (limit_pfn < size || new_pfn < iovad->start_pfn) { spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; From 7595dc588a39c37091ddf65f6c0a3cd40f128e7a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 19 Sep 2017 14:48:39 +0100 Subject: [PATCH 10/54] iommu/iova: Simplify domain destruction All put_iova_domain() should have to worry about is freeing memory - by that point the domain must no longer be live, so the act of cleaning up doesn't need to be concurrency-safe or maintain the rbtree in a self-consistent state. There's no need to waste time with locking or emptying the rcache magazines, and we can just use the postorder traversal helper to clear out the remaining rbtree entries in-place. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 50 +++++++++----------------------------------- 1 file changed, 10 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 7b7363518733..ca21196c1f2d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -583,21 +583,12 @@ EXPORT_SYMBOL_GPL(queue_iova); */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; - unsigned long flags; + struct iova *iova, *tmp; free_iova_flush_queue(iovad); free_iova_rcaches(iovad); - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = rb_entry(node, struct iova, node); - - rb_erase(node, &iovad->rbroot); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); - node = rb_first(&iovad->rbroot); - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(put_iova_domain); @@ -989,47 +980,26 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); } -/* - * Free a cpu's rcache. - */ -static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, - struct iova_rcache *rcache) -{ - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); - unsigned long flags; - - spin_lock_irqsave(&cpu_rcache->lock, flags); - - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); - iova_magazine_free(cpu_rcache->loaded); - - iova_magazine_free_pfns(cpu_rcache->prev, iovad); - iova_magazine_free(cpu_rcache->prev); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); -} - /* * free rcache data structures. */ static void free_iova_rcaches(struct iova_domain *iovad) { struct iova_rcache *rcache; - unsigned long flags; + struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; int i, j; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; - for_each_possible_cpu(cpu) - free_cpu_iova_rcache(cpu, iovad, rcache); - spin_lock_irqsave(&rcache->lock, flags); - free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + iova_magazine_free(cpu_rcache->loaded); + iova_magazine_free(cpu_rcache->prev); } - spin_unlock_irqrestore(&rcache->lock, flags); + free_percpu(rcache->cpu_rcaches); + for (j = 0; j < rcache->depot_size; ++j) + iova_magazine_free(rcache->depot[j]); } } From b826ee9a4f1cbf83cadc5a307de8eea27637699a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 19 Sep 2017 14:48:40 +0100 Subject: [PATCH 11/54] iommu/iova: Make rcache limit_pfn handling more robust When popping a pfn from an rcache, we are currently checking it directly against limit_pfn for viability. Since this represents iova->pfn_lo, it is technically possible for the corresponding iova->pfn_hi to be greater than limit_pfn. Although we generally get away with it in practice since limit_pfn is typically a power-of-two boundary and the IOVAs are size-aligned, it's pretty trivial to make the iova_rcache_get() path take the allocation size into account for complete safety. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index ca21196c1f2d..15ff3033bbd7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -406,7 +406,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long iova_pfn; struct iova *new_iova; - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); if (iova_pfn) return iova_pfn; @@ -823,7 +823,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine *mag, { BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] >= limit_pfn) + if (mag->pfns[mag->size - 1] > limit_pfn) return 0; return mag->pfns[--mag->size]; @@ -977,7 +977,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); } /* From e8b198402745ed413ed8229b2eb45d34016eb5d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 11:31:23 +0100 Subject: [PATCH 12/54] iommu/iova: Try harder to allocate from rcache magazine When devices with different DMA masks are using the same domain, or for PCI devices where we usually try a speculative 32-bit allocation first, there is a fair possibility that the top PFN of the rcache stack at any given time may be unsuitable for the lower limit, prompting a fallback to allocating anew from the rbtree. Consequently, we may end up artifically increasing pressure on the 32-bit IOVA space as unused IOVAs accumulate lower down in the rcache stacks, while callers with 32-bit masks also impose unnecessary rbtree overhead. In such cases, let's try a bit harder to satisfy the allocation locally first - scanning the whole stack should still be relatively inexpensive. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 15ff3033bbd7..b0ca23682008 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -821,12 +821,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag) static unsigned long iova_magazine_pop(struct iova_magazine *mag, unsigned long limit_pfn) { + int i; + unsigned long pfn; + BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] > limit_pfn) - return 0; + /* Only fall back to the rbtree if we have no suitable pfns at all */ + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) + if (i == 0) + return 0; - return mag->pfns[--mag->size]; + /* Swap it to pop it */ + pfn = mag->pfns[i]; + mag->pfns[i] = mag->pfns[--mag->size]; + + return pfn; } static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) From abbb8a09384f69f7bb05936879e51933c146afba Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 2 Oct 2017 11:53:31 +0100 Subject: [PATCH 13/54] iommu/iova: Don't try to copy anchor nodes Anchor nodes are not reserved IOVAs in the way that copy_reserved_iova() cares about - while the failure from reserve_iova() is benign since the target domain will already have its own anchor, we still don't want to be triggering spurious warnings. Reported-by: kernel test robot Signed-off-by: Robin Murphy Fixes: bb68b2fbfbd6 ('iommu/iova: Add rbtree anchor node') Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b0ca23682008..3aee64b99df1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -704,6 +704,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) struct iova *iova = rb_entry(node, struct iova, node); struct iova *new_iova; + if (iova->pfn_lo == IOVA_ANCHOR) + continue; + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); if (!new_iova) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", From 32b124492bdf974f68eaef1bde80dc8058aef002 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 15:55:01 +0100 Subject: [PATCH 14/54] iommu/io-pgtable-arm: Convert to IOMMU API TLB sync Now that the core API issues its own post-unmap TLB sync call, push that operation out from the io-pgtable-arm internals into the users. For now, we leave the invalidation implicit in the unmap operation, since none of the current users would benefit much from any change to that. CC: Magnus Damm CC: Laurent Pinchart Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 10 ++++++++++ drivers/iommu/arm-smmu.c | 20 +++++++++++++++----- drivers/iommu/io-pgtable-arm.c | 7 +------ drivers/iommu/ipmmu-vmsa.c | 10 ++++++++++ 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e67ba6c40faf..ee0c7b73cff7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1743,6 +1743,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + + if (smmu) + __arm_smmu_tlb_sync(smmu); +} + static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -1963,6 +1971,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3bdb799d3b4b..e4a82d70d446 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -250,6 +250,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ @@ -735,7 +736,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -813,7 +813,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } - tlb_ops = &arm_smmu_s1_tlb_ops; + smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -833,9 +833,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = min(oas, 40UL); } if (smmu->version == ARM_SMMU_V2) - tlb_ops = &arm_smmu_s2_tlb_ops_v2; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2; else - tlb_ops = &arm_smmu_s2_tlb_ops_v1; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; @@ -863,7 +863,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = tlb_ops, + .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; @@ -1259,6 +1259,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_sync(smmu_domain); +} + static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, dma_addr_t iova) { @@ -1562,6 +1570,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e8018a308868..51e5c43caed1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { - size_t unmapped; struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); @@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; - unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_lpae_unmap(data, iova, size, lvl, ptep); } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 195d6e93ac71..af8140054273 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -619,6 +619,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, return domain->iop->unmap(domain->iop, iova, size); } +static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + if (domain->mmu) + ipmmu_tlb_flush_all(domain); +} + static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, dma_addr_t iova) { @@ -876,6 +884,8 @@ static const struct iommu_ops ipmmu_ops = { .detach_dev = ipmmu_detach_device, .map = ipmmu_map, .unmap = ipmmu_unmap, + .flush_iotlb_all = ipmmu_iotlb_sync, + .iotlb_sync = ipmmu_iotlb_sync, .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device_dma, From 4d689b619445894f6b6fcbc496f6d302bd9e44a5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 15:55:02 +0100 Subject: [PATCH 15/54] iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync Now that the core API issues its own post-unmap TLB sync call, push that operation out from the io-pgtable-arm-v7s internals into the users. For now, we leave the invalidation implicit in the unmap operation, since none of the current users would benefit much from any change to that. Note that the conversion of msm_iommu is implicit, since that apparently has no specific TLB sync operation anyway. CC: Yong Wu CC: Rob Clark Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 7 +------ drivers/iommu/mtk_iommu.c | 7 +++++++ drivers/iommu/qcom_iommu.c | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index d665d0dc16e8..397531da8d9c 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - size_t unmapped; if (WARN_ON(upper_32_bits(iova))) return 0; - unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_v7s_unmap(data, iova, size, 1, data->pgd); } static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bd515be5b380..d0c8dfbbd74d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -391,6 +391,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, return unmapsz; } +static void mtk_iommu_iotlb_sync(struct iommu_domain *domain) +{ + mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); +} + static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -490,6 +495,8 @@ static struct iommu_ops mtk_iommu_ops = { .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = mtk_iommu_iotlb_sync, + .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, .add_device = mtk_iommu_add_device, .remove_device = mtk_iommu_remove_device, diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0..4a2c4378b3db 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -443,6 +443,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return ret; } +static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops, + struct io_pgtable, ops); + if (!qcom_domain->pgtbl_ops) + return; + + pm_runtime_get_sync(qcom_domain->iommu->dev); + qcom_iommu_tlb_sync(pgtable->cookie); + pm_runtime_put_sync(qcom_domain->iommu->dev); +} + static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -570,6 +583,8 @@ static const struct iommu_ops qcom_iommu_ops = { .map = qcom_iommu_map, .unmap = qcom_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = qcom_iommu_iotlb_sync, + .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, .add_device = qcom_iommu_add_device, .remove_device = qcom_iommu_remove_device, From ec154bf56b276a0bb36079a5d22a267b5f417801 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 15:00:53 +0200 Subject: [PATCH 16/54] iommu/vt-d: Don't register bus-notifier under dmar_global_lock The notifier function will take the dmar_global_lock too, so lockdep complains about inverse locking order when the notifier is registered under the dmar_global_lock. Reported-by: Jan Kiszka Fixes: 59ce0515cdaf ('iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens') Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 7 +++++-- drivers/iommu/intel-iommu.c | 10 ++++++++++ include/linux/dmar.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 57c920c1372d..1ea7cd537873 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void) dmar_free_pci_notify_info(info); } } - - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); } return dmar_dev_scope_status; } +void dmar_register_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); +} + int __init dmar_table_init(void) { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6784a05dd6b2..934cef924461 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4752,6 +4752,16 @@ int __init intel_iommu_init(void) goto out_free_dmar; } + up_write(&dmar_global_lock); + + /* + * The bus notifier takes the dmar_global_lock, so lockdep will + * complain later when we register it under the lock. + */ + dmar_register_bus_notifier(); + + down_write(&dmar_global_lock); + if (no_iommu || dmar_disabled) { /* * We exit the function here to ensure IOMMU's remapping and diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e8ffba1052d3..e2433bc50210 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); +extern void dmar_register_bus_notifier(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); From b117e0380513c186065f247a9af09dc0cd3e703d Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 8 Oct 2017 23:33:31 +0100 Subject: [PATCH 17/54] iommu/vt-d: Delete unnecessary check in domain_context_mapping_one() Variable did_old is unsigned so checking whether it is greater or equal to zero is not necessary. Signed-off-by: Christos Gkekas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 934cef924461..1dab9f73a20b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2058,7 +2058,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { + if (did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, From 37946d95fc1a41ed79efb613b0818c2cdecbb2fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 12:16:39 +0200 Subject: [PATCH 18/54] iommu/amd: Add align parameter to alloc_irq_index() For multi-MSI IRQ ranges the IRQ index needs to be aligned to the power-of-two of the requested IRQ count. Extend the alloc_irq_index() function to allow such an allocation. Reported-by: Thomas Gleixner Fixes: 2b324506341cb ('iommu/amd: Add routines to manage irq remapping tables') Reviewed-by: Thomas Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..2d4ee2555a0d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3660,11 +3660,11 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count) +static int alloc_irq_index(u16 devid, int count, bool align) { struct irq_remap_table *table; + int index, c, alignment = 1; unsigned long flags; - int index, c; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -3674,16 +3674,22 @@ static int alloc_irq_index(u16 devid, int count) if (!table) return -ENODEV; + if (align) + alignment = roundup_pow_of_two(count); + spin_lock_irqsave(&table->lock, flags); /* Scan table for free entries */ - for (c = 0, index = table->min_index; + for (index = ALIGN(table->min_index, alignment), c = 0; index < MAX_IRQS_PER_TABLE; - ++index) { - if (!iommu->irte_ops->is_allocated(table, index)) + index++) { + if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; - else - c = 0; + } else { + c = 0; + index = ALIGN(index, alignment); + continue; + } if (c == count) { for (; c != 0; --c) @@ -4096,7 +4102,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, else ret = -ENOMEM; } else { - index = alloc_irq_index(devid, nr_irqs); + index = alloc_irq_index(devid, nr_irqs, false); } if (index < 0) { pr_warn("Failed to allocate IRTE\n"); From 53b9ec3fbb7da97d13951debbd42e3a0c4a7c9f7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 12:22:06 +0200 Subject: [PATCH 19/54] iommu/amd: Enforce alignment for MSI IRQs Make use of the new alignment capability of alloc_irq_index() to enforce IRQ index alignment for MSI. Reported-by: Thomas Gleixner Fixes: 2b324506341cb ('iommu/amd: Add routines to manage irq remapping tables') Reviewed-by: Thomas Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2d4ee2555a0d..cb7c531542da 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4102,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, else ret = -ENOMEM; } else { - index = alloc_irq_index(devid, nr_irqs, false); + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); + + index = alloc_irq_index(devid, nr_irqs, align); } if (index < 0) { pr_warn("Failed to allocate IRTE\n"); From 538d5b333216c3daa7a5821307164f10af73ec8c Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Wed, 20 Sep 2017 10:52:02 +0200 Subject: [PATCH 20/54] iommu/iova: Make rcache flush optional on IOVA allocation failure Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to defer it. - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Signed-off-by: Tomasz Nowicki Reviewed-by: Robin Murphy Tested-by: Nate Watterson Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 +++-- drivers/iommu/dma-iommu.c | 6 ++++-- drivers/iommu/intel-iommu.c | 5 +++-- drivers/iommu/iova.c | 11 ++++++----- include/linux/iova.h | 5 +++-- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 647ab7691aee..3d64c844d8b1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1546,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev, if (dma_mask > DMA_BIT_MASK(32)) pfn = alloc_iova_fast(&dma_dom->iovad, pages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (!pfn) - pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); + pfn = alloc_iova_fast(&dma_dom->iovad, pages, + IOVA_PFN(dma_mask), true); return (pfn << PAGE_SHIFT); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 191be9c80a8a..25914d36c5ac 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -370,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); + iova = alloc_iova_fast(iovad, iova_len, + DMA_BIT_MASK(32) >> shift, false); if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, + true); return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ebb48353dd39..b3914fce8254 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3469,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev, * from higher range */ iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (iova_pfn) return iova_pfn; } - iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); + iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, + IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { pr_err("Allocating %ld-page iova for %s failed", nrpages, dev_name(dev)); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 3aee64b99df1..84bda3a4dafc 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -395,14 +395,15 @@ EXPORT_SYMBOL_GPL(free_iova); * @iovad: - iova domain in question * @size: - size of page frames to allocate * @limit_pfn: - max limit address + * @flush_rcache: - set to flush rcache on regular allocation failure * This function tries to satisfy an iova allocation from the rcache, - * and falls back to regular allocation on failure. + * and falls back to regular allocation on failure. If regular allocation + * fails too and the flush_rcache flag is set then the rcache will be flushed. */ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, bool flush_rcache) { - bool flushed_rcache = false; unsigned long iova_pfn; struct iova *new_iova; @@ -415,11 +416,11 @@ retry: if (!new_iova) { unsigned int cpu; - if (flushed_rcache) + if (!flush_rcache) return 0; /* Try replenishing IOVAs by flushing rcache. */ - flushed_rcache = true; + flush_rcache = false; for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); goto retry; diff --git a/include/linux/iova.h b/include/linux/iova.h index c696ee81054e..928442dda565 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -150,7 +150,7 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn); + unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -212,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad, static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, + bool flush_rcache) { return 0; } From 6948d4a7e11b37c83df9b508bbf725512d82a77b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 22 Sep 2017 15:04:00 +0100 Subject: [PATCH 21/54] iommu/arm-smmu: Remove ACPICA workarounds Now that the kernel headers have synced with the relevant upstream ACPICA updates, it's time to clean up the temporary local definitions. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 11 +---------- drivers/iommu/arm-smmu.c | 8 -------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e67ba6c40faf..47f52b1ab838 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -413,15 +413,6 @@ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 -/* Until ACPICA headers cover IORT rev. C */ -#ifndef ACPI_IORT_SMMU_HISILICON_HI161X -#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 -#endif - -#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX -#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 -#endif - static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -2665,7 +2656,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; - case ACPI_IORT_SMMU_HISILICON_HI161X: + case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3bdb799d3b4b..bce312a99f52 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -119,14 +119,6 @@ enum arm_smmu_implementation { CAVIUM_SMMUV2, }; -/* Until ACPICA headers cover IORT rev. C */ -#ifndef ACPI_IORT_SMMU_CORELINK_MMU401 -#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 -#endif -#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX -#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 -#endif - struct arm_smmu_s2cr { struct iommu_group *group; int count; From 704c038255d44e821a05835c9bf8c8d0393a4777 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 5 Oct 2017 16:49:37 +0100 Subject: [PATCH 22/54] iommu/arm-smmu-v3: Ensure we sync STE when only changing config field The SMMUv3 architecture permits caching of data structures deemed to be "reachable" by the SMU, which includes STEs marked as invalid. When transitioning an STE to a bypass/fault configuration at init or detach time, we mistakenly elide the CMDQ_OP_CFGI_STE operation in some cases, therefore potentially leaving the old STE state cached in the SMMU. This patch fixes the problem by ensuring that we perform the CMDQ_OP_CFGI_STE operation irrespective of the validity of the previous STE. Reviewed-by: Robin Murphy Reported-by: Eric Auger Reviewed-by: Eric Auger Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 47f52b1ab838..80532d9ecaaf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1085,7 +1085,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING << STRTAB_STE_1_SHCFG_SHIFT); dst[2] = 0; /* Nuke the VMID */ - if (ste_live) + /* + * The SMMU can perform negative caching, so we must sync + * the STE regardless of whether the old value was live. + */ + if (smmu) arm_smmu_sync_ste_for_sid(smmu, sid); return; } From 74f55d34414c866dbf3a69e28a2f963abe61ca58 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 11 Oct 2017 15:08:39 -0700 Subject: [PATCH 23/54] iommu/arm-smmu: Enable bypass transaction caching for ARM SMMU 500 The ARM SMMU identity mapping performance was poor compared with the DMA mode. It was found that enable caching would restore the performance back to normal. The S2CRB_TLBEN bit in the ACR register would allow for caching of the stream to context register bypass transaction information. Reviewed-by: Robin Murphy Signed-off-by: Feng Kan Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index bce312a99f52..4683f518cef4 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -59,6 +59,7 @@ #define ARM_MMU500_ACTLR_CPRE (1 << 1) #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) +#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10) #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ @@ -1598,7 +1599,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) * Allow unmatched Stream IDs to allocate bypass * TLB entries for reduced latency. */ - reg |= ARM_MMU500_ACR_SMTNMB_TLBEN; + reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR); } From 9cff86fd2b960f9bedc67771c24a73d7dc32048d Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Thu, 21 Sep 2017 20:36:07 +0800 Subject: [PATCH 24/54] iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD and CD.S According to Spec, it is ILLEGAL to set STE.S1STALLD if STALL_MODEL is not 0b00, which means we should not disable stall mode if stall or terminate mode is not configuable. Meanwhile, it is also ILLEGAL when STALL_MODEL==0b10 && CD.S==0 which means if stall mode is force we should always set CD.S. As Jean-Philippe's suggestion, this patch introduce a feature bit ARM_SMMU_FEAT_STALL_FORCE, which means smmu only supports stall force. Therefore, we can avoid the ILLEGAL setting of STE.S1STALLD.by checking ARM_SMMU_FEAT_STALL_FORCE. This patch keeps the ARM_SMMU_FEAT_STALLS as the meaning of stall supported (force or configuable) to easy to expand the future function, i.e. we can only use ARM_SMMU_FEAT_STALLS to check whether we should register fault handle or enable master can_stall, etc to supporte platform SVM. The feature bit, STE.S1STALLD and CD.S setting will be like: STALL_MODEL FEATURE S1STALLD CD.S 0b00 ARM_SMMU_FEAT_STALLS 0b1 0b0 0b01 !ARM_SMMU_FEAT_STALLS && !ARM_SMMU_FEAT_STALL_FORCE 0b0 0b0 0b10 ARM_SMMU_FEAT_STALLS && ARM_SMMU_FEAT_STALL_FORCE 0b0 0b1 after apply this patch. Signed-off-by: Yisheng Xie Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 80532d9ecaaf..159117e2c5ad 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -316,6 +316,7 @@ #define ARM64_TCR_TBI0_MASK 0x1UL #define CTXDESC_CD_0_AA64 (1UL << 41) +#define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) #define CTXDESC_CD_0_A (1UL << 46) #define CTXDESC_CD_0_ASET_SHIFT 47 @@ -595,6 +596,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) #define ARM_SMMU_FEAT_STALLS (1 << 11) #define ARM_SMMU_FEAT_HYP (1 << 12) +#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -987,6 +989,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE | CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT | CTXDESC_CD_0_V; + + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + val |= CTXDESC_CD_0_S; + cfg->cdptr[0] = cpu_to_le64(val); val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT; @@ -1107,7 +1114,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, #endif STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); - if (smmu->features & ARM_SMMU_FEAT_STALLS) + if (smmu->features & ARM_SMMU_FEAT_STALLS && + !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK @@ -2531,9 +2539,10 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) coherent ? "true" : "false"); switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { - case IDR0_STALL_MODEL_STALL: - /* Fallthrough */ case IDR0_STALL_MODEL_FORCE: + smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; + /* Fallthrough */ + case IDR0_STALL_MODEL_STALL: smmu->features |= ARM_SMMU_FEAT_STALLS; } From 2a22baa2d17f37f9bd86febcb69f10dbe6792b58 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Sep 2017 14:55:40 +0100 Subject: [PATCH 25/54] iommu/arm-smmu-v3: Correct COHACC override message Slightly confusingly, when reporting a mismatch of the ID register value, we still refer to the IORT COHACC override flag as the "dma-coherent property" if we booted with ACPI. Update the message to be firmware-agnostic in line with SMMUv2. Acked-by: Lorenzo Pieralisi Reported-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 159117e2c5ad..d81d5dbffbe2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2535,7 +2535,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) * register, but warn on mismatch. */ if (!!(reg & IDR0_COHACC) != coherent) - dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", + dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", coherent ? "true" : "false"); switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { From 2f657add07a8f758e41076820157eeca1df22b79 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 31 Aug 2017 14:44:25 +0100 Subject: [PATCH 26/54] iommu/arm-smmu-v3: Specialise CMD_SYNC handling CMD_SYNC already has a bit of special treatment here and there, but as we're about to extend it with more functionality for completing outside the CMDQ lock, things are going to get rather messy if we keep trying to cram everything into a single generic command interface. Instead, let's break out the issuing of CMD_SYNC into its own specific helper where upcoming changes will have room to breathe. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 54 ++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d81d5dbffbe2..18a0fa7dd72d 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -929,13 +929,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } +static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) +{ + struct arm_smmu_queue *q = &smmu->cmdq.q; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + + while (queue_insert_raw(q, cmd) == -ENOSPC) { + if (queue_poll_cons(q, false, wfe)) + dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); + } +} + static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); - struct arm_smmu_queue *q = &smmu->cmdq.q; if (arm_smmu_cmdq_build_cmd(cmd, ent)) { dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", @@ -944,16 +953,29 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, } spin_lock_irqsave(&smmu->cmdq.lock, flags); - while (queue_insert_raw(q, cmd) == -ENOSPC) { - if (queue_poll_cons(q, false, wfe)) - dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); - } - - if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) - dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); + arm_smmu_cmdq_insert_cmd(smmu, cmd); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); } +static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + unsigned long flags; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; + int ret; + + arm_smmu_cmdq_build_cmd(cmd, &ent); + + spin_lock_irqsave(&smmu->cmdq.lock, flags); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + + if (ret) + dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); +} + /* Context descriptor manipulation functions */ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) { @@ -1027,8 +1049,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, @@ -1355,10 +1376,7 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { - struct arm_smmu_cmdq_ent cmd; - - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_sync(void *cookie) @@ -2402,8 +2420,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { @@ -2413,8 +2430,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); From dce032a15ced2ee9fa58ed7b52c492795d096a40 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 31 Aug 2017 14:44:26 +0100 Subject: [PATCH 27/54] iommu/arm-smmu-v3: Forget about cmdq-sync interrupt The cmdq-sync interrupt is never going to be particularly useful, since for stage 1 DMA at least we'll often need to wait for sync completion within someone else's IRQ handler, thus have to implement polling anyway. Beyond that, the overhead of taking an interrupt, then still having to grovel around in the queue to figure out *which* sync command completed, doesn't seem much more attractive than simple polling either. Furthermore, if an implementation both has wired interrupts and supports MSIs, then we don't want to be taking the IRQ unnecessarily if we're using the MSI write to update memory. Let's just make life simpler by not even bothering to claim it in the first place. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 18a0fa7dd72d..aa3bd3968290 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1299,12 +1299,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) return IRQ_HANDLED; } -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev) -{ - /* We don't actually use CMD_SYNC interrupts for anything */ - return IRQ_HANDLED; -} - static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) @@ -1337,10 +1331,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) if (active & GERROR_MSI_EVTQ_ABT_ERR) dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); - if (active & GERROR_MSI_CMDQ_ABT_ERR) { + if (active & GERROR_MSI_CMDQ_ABT_ERR) dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); - arm_smmu_cmdq_sync_handler(irq, smmu->dev); - } if (active & GERROR_PRIQ_ABT_ERR) dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); @@ -1369,7 +1361,6 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) { arm_smmu_gerror_handler(irq, dev); - arm_smmu_cmdq_sync_handler(irq, dev); return IRQ_WAKE_THREAD; } @@ -2286,15 +2277,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) dev_warn(smmu->dev, "failed to enable evtq irq\n"); } - irq = smmu->cmdq.q.irq; - if (irq) { - ret = devm_request_irq(smmu->dev, irq, - arm_smmu_cmdq_sync_handler, 0, - "arm-smmu-v3-cmdq-sync", smmu); - if (ret < 0) - dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n"); - } - irq = smmu->gerr_irq; if (irq) { ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, @@ -2803,10 +2785,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (irq > 0) smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); if (irq > 0) smmu->gerr_irq = irq; From 37de98f8f1cf330918b242cd3ce13751857243a6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 18 Oct 2017 15:04:26 +0100 Subject: [PATCH 28/54] iommu/arm-smmu-v3: Use CMD_SYNC completion MSI As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least because we often need to wait for sync completion within someone else's IRQ handler anyway. However, when the SMMU is both coherent and supports MSIs, we can have a lot more fun by not using it as an interrupt at all. Following the example suggested in the architecture and using a write targeting normal memory, we can let callers wait on a status variable outside the lock instead of having to stall the entire queue or even touch MMIO registers. Since multiple sync commands are guaranteed to complete in order, a simple incrementing sequence count is all we need to unambiguously support any realistic number of overlapping waiters. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 51 +++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index aa3bd3968290..ceb8f9ef4bad 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -378,7 +378,16 @@ #define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_MSH_SHIFT 22 +#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT) +#define CMDQ_SYNC_0_MSIATTR_SHIFT 24 +#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT) +#define CMDQ_SYNC_0_MSIDATA_SHIFT 32 +#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL +#define CMDQ_SYNC_1_MSIADDR_SHIFT 0 +#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL /* Event queue */ #define EVTQ_ENT_DWORDS 4 @@ -410,6 +419,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -496,6 +506,10 @@ struct arm_smmu_cmdq_ent { } pri; #define CMDQ_OP_CMD_SYNC 0x46 + struct { + u32 msidata; + u64 msiaddr; + } sync; }; }; @@ -609,6 +623,7 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; + atomic_t sync_nr; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -627,6 +642,8 @@ struct arm_smmu_device { struct arm_smmu_strtab_cfg strtab_cfg; + u32 sync_count; + /* IOMMU core code handle */ struct iommu_device iommu; }; @@ -871,7 +888,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } break; case CMDQ_OP_CMD_SYNC: - cmd[0] |= CMDQ_SYNC_0_CS_SEV; + if (ent->sync.msiaddr) + cmd[0] |= CMDQ_SYNC_0_CS_IRQ; + else + cmd[0] |= CMDQ_SYNC_0_CS_SEV; + cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB; + cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT; + cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; break; default: return -ENOENT; @@ -957,21 +980,44 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, spin_unlock_irqrestore(&smmu->cmdq.lock, flags); } +/* + * The difference between val and sync_idx is bounded by the maximum size of + * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic. + */ +static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) +{ + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); + u32 val = smp_cond_load_acquire(&smmu->sync_count, + (int)(VAL - sync_idx) >= 0 || + !ktime_before(ktime_get(), timeout)); + + return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; +} + static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && + (smmu->features & ARM_SMMU_FEAT_COHERENCY); struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; int ret; + if (msi) { + ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr); + ent.sync.msiaddr = virt_to_phys(&smmu->sync_count); + } arm_smmu_cmdq_build_cmd(cmd, &ent); spin_lock_irqsave(&smmu->cmdq.lock, flags); arm_smmu_cmdq_insert_cmd(smmu, cmd); - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + if (!msi) + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + if (msi) + ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); if (ret) dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); } @@ -2159,6 +2205,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; + atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret; From 49806599c31d77b1050022aeb3da3051cd9f85f6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Oct 2017 16:41:53 +0100 Subject: [PATCH 29/54] iommu/arm-smmu-v3: Split arm_smmu_cmdq_issue_sync in half arm_smmu_cmdq_issue_sync is a little unwieldy now that it supports both MSI and event-based polling, so split it into two functions to make things easier to follow. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 53 ++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index ceb8f9ef4bad..3876a0328589 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -984,7 +984,7 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, * The difference between val and sync_idx is bounded by the maximum size of * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic. */ -static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) +static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) { ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); u32 val = smp_cond_load_acquire(&smmu->sync_count, @@ -994,30 +994,53 @@ static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; } -static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); - bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && - (smmu->features & ARM_SMMU_FEAT_COHERENCY); - struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; - int ret; + struct arm_smmu_cmdq_ent ent = { + .opcode = CMDQ_OP_CMD_SYNC, + .sync = { + .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), + .msiaddr = virt_to_phys(&smmu->sync_count), + }, + }; - if (msi) { - ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr); - ent.sync.msiaddr = virt_to_phys(&smmu->sync_count); - } arm_smmu_cmdq_build_cmd(cmd, &ent); spin_lock_irqsave(&smmu->cmdq.lock, flags); arm_smmu_cmdq_insert_cmd(smmu, cmd); - if (!msi) - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); - if (msi) - ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); + return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); +} + +static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + unsigned long flags; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; + int ret; + + arm_smmu_cmdq_build_cmd(cmd, &ent); + + spin_lock_irqsave(&smmu->cmdq.lock, flags); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + + return ret; +} + +static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + int ret; + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && + (smmu->features & ARM_SMMU_FEAT_COHERENCY); + + ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu) + : __arm_smmu_cmdq_issue_sync(smmu); if (ret) dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); } From a529ea19aadb7a3bbcce3335ed4671adbe275b22 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Oct 2017 16:49:30 +0100 Subject: [PATCH 30/54] iommu/arm-smmu-v3: Consolidate identical timeouts We have separate (identical) timeout values for polling for a queue to drain and waiting for an MSI to signal CMD_SYNC completion. In reality, we only wait for the command queue to drain if we're waiting on a sync, so just merged these two timeouts into a single constant. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3876a0328589..c148f76dd8e2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -418,8 +418,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 -#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ -#define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -767,17 +766,17 @@ static void queue_inc_prod(struct arm_smmu_queue *q) * Wait for the SMMU to consume items. If drain is true, wait until the queue * is empty. Otherwise, wait until there is at least one free slot. */ -static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) +static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) { ktime_t timeout; unsigned int delay = 1; - /* Wait longer if it's queue drain */ - timeout = ktime_add_us(ktime_get(), drain ? - ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : + /* Wait longer if it's a CMD_SYNC */ + timeout = ktime_add_us(ktime_get(), sync ? + ARM_SMMU_CMDQ_SYNC_TIMEOUT_US : ARM_SMMU_POLL_TIMEOUT_US); - while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { + while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) return -ETIMEDOUT; @@ -986,10 +985,13 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, */ static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) { - ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); - u32 val = smp_cond_load_acquire(&smmu->sync_count, - (int)(VAL - sync_idx) >= 0 || - !ktime_before(ktime_get(), timeout)); + ktime_t timeout; + u32 val; + + timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US); + val = smp_cond_load_acquire(&smmu->sync_count, + (int)(VAL - sync_idx) >= 0 || + !ktime_before(ktime_get(), timeout)); return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; } From 8ff0f72371709889349a706e19bb38d9f71b1669 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 18 Oct 2017 15:04:28 +0100 Subject: [PATCH 31/54] iommu/arm-smmu-v3: Use burst-polling for sync completion While CMD_SYNC is unlikely to complete immediately such that we never go round the polling loop, with a lightly-loaded queue it may still do so long before the delay period is up. If we have no better completion notifier, use similar logic as we have for SMMUv2 to spin a number of times before each backoff, so that we have more chance of catching syncs which complete relatively quickly and avoid delaying unnecessarily. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index c148f76dd8e2..bfab719190e8 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -419,6 +419,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10 #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -769,7 +770,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q) static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) { ktime_t timeout; - unsigned int delay = 1; + unsigned int delay = 1, spin_cnt = 0; /* Wait longer if it's a CMD_SYNC */ timeout = ktime_add_us(ktime_get(), sync ? @@ -782,10 +783,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) if (wfe) { wfe(); - } else { + } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) { cpu_relax(); + continue; + } else { udelay(delay); delay *= 2; + spin_cnt = 0; } } From 07d1c91b6c649705fdd9acf58001071845ecf068 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 3 Nov 2017 10:50:31 -0600 Subject: [PATCH 32/54] iommu/amd: Fix alloc_irq_index() increment On an is_allocated() interrupt index, we ALIGN() the current index and then increment it via the for loop, guaranteeing that it is no longer aligned for alignments >1. We instead need to align the next index, to guarantee forward progress, moving the increment-only to the case where the index was found to be unallocated. Fixes: 37946d95fc1a ('iommu/amd: Add align parameter to alloc_irq_index()') Signed-off-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9dc7facfd2e5..3c1a29104f0e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3682,13 +3682,12 @@ static int alloc_irq_index(u16 devid, int count, bool align) /* Scan table for free entries */ for (index = ALIGN(table->min_index, alignment), c = 0; - index < MAX_IRQS_PER_TABLE; - index++) { + index < MAX_IRQS_PER_TABLE;) { if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; } else { c = 0; - index = ALIGN(index, alignment); + index = ALIGN(index + 1, alignment); continue; } @@ -3699,6 +3698,8 @@ static int alloc_irq_index(u16 devid, int count, bool align) index -= count - 1; goto out; } + + index++; } index = -ENOSPC; From 2c40367cbff6f9ed1efda238685837fb5f0d9e3c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 3 Nov 2017 10:50:32 -0600 Subject: [PATCH 33/54] iommu/amd: remove unused variable flush_addr Variable flush_addr is being assigned but is never read; it is redundant and can be removed. Cleans up the clang warning: drivers/iommu/amd_iommu.c:2388:2: warning: Value stored to 'flush_addr' is never read Signed-off-by: Colin Ian King Signed-off-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 3c1a29104f0e..797e6454afd5 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2382,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, size_t size, int dir) { - dma_addr_t flush_addr; dma_addr_t i, start; unsigned int pages; - flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; From 049541e178d5b1d003584aa0ad2a96101f0694d2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 Nov 2017 10:50:33 -0600 Subject: [PATCH 34/54] iommu: qcom: wire up fault handler This is quite useful for debugging. Currently, always TERMINATE the translation when the fault handler returns (since this is all we need for debugging drivers). But I expect the SVM work should eventually let us do something more clever. Signed-off-by: Rob Clark Signed-off-by: Alex Williamson --- drivers/iommu/qcom_iommu.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 4a2c4378b3db..e07f02d00c68 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -66,6 +66,7 @@ struct qcom_iommu_ctx { void __iomem *base; bool secure_init; u8 asid; /* asid and ctx bank # are 1:1 */ + struct iommu_domain *domain; }; struct qcom_iommu_domain { @@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); - dev_err_ratelimited(ctx->dev, - "Unhandled context fault: fsr=0x%x, " - "iova=0x%016llx, fsynr=0x%x, cb=%d\n", - fsr, iova, fsynr, ctx->asid); + if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) { + dev_err_ratelimited(ctx->dev, + "Unhandled context fault: fsr=0x%x, " + "iova=0x%016llx, fsynr=0x%x, cb=%d\n", + fsr, iova, fsynr, ctx->asid); + } iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); + iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE); return IRQ_HANDLED; } @@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* SCTLR */ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | - SCTLR_M | SCTLR_S1_ASIDPNE; + SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG; if (IS_ENABLED(CONFIG_BIG_ENDIAN)) reg |= SCTLR_E; iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); + + ctx->domain = domain; } mutex_unlock(&qcom_domain->init_mutex); @@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de /* Disable the context bank: */ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + + ctx->domain = NULL; } pm_runtime_put_sync(qcom_iommu->dev); From b92b4fb5c14257c0e7eae291ecc1f7b1962e1699 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 3 Nov 2017 10:50:34 -0600 Subject: [PATCH 35/54] iommu/amd: Limit the IOVA page range to the specified addresses The extent of pages specified when applying a reserved region should include up to the last page of the range, but not the page following the range. Signed-off-by: Gary R Hook Fixes: 8d54d6c8b8f3 ('iommu/amd: Implement apply_dm_region call-back') Signed-off-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 797e6454afd5..a8c111e96cc3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3151,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev, unsigned long start, end; start = IOVA_PFN(region->start); - end = IOVA_PFN(region->start + region->length); + end = IOVA_PFN(region->start + region->length - 1); WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } From 2e2e35d512798193b4165ed884a5d1d9b181e231 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 Nov 2017 10:51:32 -0600 Subject: [PATCH 36/54] iommu/vt-d: Missing checks for pasid tables if allocation fails intel_svm_alloc_pasid_tables() might return an error but never be checked by the callers. Later when intel_svm_bind_mm() is called, there are no checks for valid pasid tables before enabling them. Signed-off-by: Ashok Raj Signed-off-by: Lu Baolu Reviewed-by: Liu, Yi L Signed-off-by: Alex Williamson --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f6697e55c2d4..43280ca282df 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ int pasid_max; int ret; - if (WARN_ON(!iommu)) + if (WARN_ON(!iommu || !iommu->pasid_table)) return -EINVAL; if (dev_is_pci(dev)) { From 973b546451fdf11e518cc96d1b137af893a38db5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 Nov 2017 10:51:33 -0600 Subject: [PATCH 37/54] iommu/vt-d: Clear Page Request Overflow fault bit Currently Page Request Overflow bit in IOMMU Fault Status register is not cleared. Not clearing this bit would mean that any future page-request is going to be automatically dropped by IOMMU. Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- drivers/iommu/dmar.c | 3 ++- include/linux/intel-iommu.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 1ea7cd537873..9a7ffd13c7f0 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1679,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id) raw_spin_lock_irqsave(&iommu->register_lock, flag); } - writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); + writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO, + iommu->reg + DMAR_FSTS_REG); unlock_exit: raw_spin_unlock_irqrestore(&iommu->register_lock, flag); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 485a5b48f038..f3274d9f46a2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -212,6 +212,7 @@ #define DMA_FSTS_IQE (1 << 4) #define DMA_FSTS_ICE (1 << 5) #define DMA_FSTS_ITE (1 << 6) +#define DMA_FSTS_PRO (1 << 7) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ From 4fa064b26c2eb9e1c38460ba327765dff16a4b14 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 Nov 2017 10:51:34 -0600 Subject: [PATCH 38/54] iommu/vt-d: Clear pasid table entry when memory unbound In intel_svm_unbind_mm(), pasid table entry must be cleared during svm free. Otherwise, hardware may be set up with a wild pointer. Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- drivers/iommu/intel-svm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 43280ca282df..ed1cf7c5a43b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { + svm->iommu->pasid_table[svm->pasid].val = 0; + wmb(); idr_remove(&svm->iommu->pasid_idr, svm->pasid); if (svm->mm) From 105a004e2187609a74f75d55fd0f9a054b49d60a Mon Sep 17 00:00:00 2001 From: "weiyongjun (A)" Date: Tue, 17 Oct 2017 12:11:22 +0000 Subject: [PATCH 39/54] iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma() In case of error, the function iommu_group_get() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 3ae47292024f ("iommu/ipmmu-vmsa: Add new IOMMU_DOMAIN_DMA ops") Signed-off-by: Wei Yongjun Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index af8140054273..00e88a88ee3a 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -871,7 +871,7 @@ static struct iommu_group *ipmmu_find_group_dma(struct device *dev) sibling = ipmmu_find_sibling_device(dev); if (sibling) group = iommu_group_get(sibling); - if (!sibling || IS_ERR(group)) + if (!sibling || !group) group = generic_device_group(dev); return group; From 1c7e7c0278df968221a5edb1a293423e13b13814 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 13 Oct 2017 19:23:39 +0100 Subject: [PATCH 40/54] iommu/ipmmu-vmsa: Unify domain alloc/free We have two implementations for ipmmu_ops->alloc depending on CONFIG_IOMMU_DMA, the difference being whether they accept the IOMMU_DOMAIN_DMA type or not. However, iommu_dma_get_cookie() is guaranteed to return an error when !CONFIG_IOMMU_DMA, so if ipmmu_domain_alloc_dma() was actually checking and handling the return value correctly, it would behave the same as ipmmu_domain_alloc() anyway. Similarly for freeing; iommu_put_dma_cookie() is robust by design. Signed-off-by: Robin Murphy Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 65 ++++++++++++++------------------------ 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 00e88a88ee3a..37154075c00a 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -528,6 +528,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) return &domain->io_domain; } +static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +{ + struct iommu_domain *io_domain = NULL; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + io_domain = __ipmmu_domain_alloc(type); + break; + + case IOMMU_DOMAIN_DMA: + io_domain = __ipmmu_domain_alloc(type); + if (io_domain && iommu_get_dma_cookie(io_domain)) { + kfree(io_domain); + io_domain = NULL; + } + break; + } + + return io_domain; +} + static void ipmmu_domain_free(struct iommu_domain *io_domain) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); @@ -536,6 +557,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) * Free the domain resources. We assume that all devices have already * been detached. */ + iommu_put_dma_cookie(io_domain); ipmmu_domain_destroy_context(domain); free_io_pgtable_ops(domain->iop); kfree(domain); @@ -671,14 +693,6 @@ static int ipmmu_of_xlate(struct device *dev, #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) -{ - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - - return __ipmmu_domain_alloc(type); -} - static int ipmmu_add_device(struct device *dev) { struct ipmmu_vmsa_device *mmu = NULL; @@ -779,37 +793,6 @@ static const struct iommu_ops ipmmu_ops = { static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); static LIST_HEAD(ipmmu_slave_devices); -static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) -{ - struct iommu_domain *io_domain = NULL; - - switch (type) { - case IOMMU_DOMAIN_UNMANAGED: - io_domain = __ipmmu_domain_alloc(type); - break; - - case IOMMU_DOMAIN_DMA: - io_domain = __ipmmu_domain_alloc(type); - if (io_domain) - iommu_get_dma_cookie(io_domain); - break; - } - - return io_domain; -} - -static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) -{ - switch (io_domain->type) { - case IOMMU_DOMAIN_DMA: - iommu_put_dma_cookie(io_domain); - /* fall-through */ - default: - ipmmu_domain_free(io_domain); - break; - } -} - static int ipmmu_add_device_dma(struct device *dev) { struct iommu_group *group; @@ -878,8 +861,8 @@ static struct iommu_group *ipmmu_find_group_dma(struct device *dev) } static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc_dma, - .domain_free = ipmmu_domain_free_dma, + .domain_alloc = ipmmu_domain_alloc, + .domain_free = ipmmu_domain_free, .attach_dev = ipmmu_attach_device, .detach_dev = ipmmu_detach_device, .map = ipmmu_map, From b354c73edc7eb8d6ee643866e9e4de7842213b06 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 13 Oct 2017 19:23:40 +0100 Subject: [PATCH 41/54] iommu/ipmmu-vmsa: Simplify group allocation We go through quite the merry dance in order to find masters behind the same IPMMU instance, so that we can ensure they are grouped together. None of which is really necessary, since the master's private data already points to the particular IPMMU it is associated with, and that IPMMU instance data is the perfect place to keep track of a per-instance group directly. Signed-off-by: Robin Murphy Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 53 +++++++------------------------------- 1 file changed, 9 insertions(+), 44 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 37154075c00a..6e6a86f3c375 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -43,6 +43,7 @@ struct ipmmu_vmsa_device { DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; + struct iommu_group *group; struct dma_iommu_mapping *mapping; }; @@ -59,8 +60,6 @@ struct ipmmu_vmsa_domain { struct ipmmu_vmsa_iommu_priv { struct ipmmu_vmsa_device *mmu; - struct device *dev; - struct list_head list; }; static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) @@ -674,7 +673,6 @@ static int ipmmu_init_platform_device(struct device *dev, return -ENOMEM; priv->mmu = platform_get_drvdata(ipmmu_pdev); - priv->dev = dev; dev->iommu_fwspec->iommu_priv = priv; return 0; } @@ -790,9 +788,6 @@ static const struct iommu_ops ipmmu_ops = { #ifdef CONFIG_IOMMU_DMA -static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); -static LIST_HEAD(ipmmu_slave_devices); - static int ipmmu_add_device_dma(struct device *dev) { struct iommu_group *group; @@ -807,55 +802,25 @@ static int ipmmu_add_device_dma(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); - spin_lock(&ipmmu_slave_devices_lock); - list_add(&to_priv(dev)->list, &ipmmu_slave_devices); - spin_unlock(&ipmmu_slave_devices_lock); return 0; } static void ipmmu_remove_device_dma(struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); - - spin_lock(&ipmmu_slave_devices_lock); - list_del(&priv->list); - spin_unlock(&ipmmu_slave_devices_lock); - iommu_group_remove_device(dev); } -static struct device *ipmmu_find_sibling_device(struct device *dev) +static struct iommu_group *ipmmu_find_group(struct device *dev) { struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); - struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; - bool found = false; - - spin_lock(&ipmmu_slave_devices_lock); - - list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { - if (priv == sibling_priv) - continue; - if (sibling_priv->mmu == priv->mmu) { - found = true; - break; - } - } - - spin_unlock(&ipmmu_slave_devices_lock); - - return found ? sibling_priv->dev : NULL; -} - -static struct iommu_group *ipmmu_find_group_dma(struct device *dev) -{ struct iommu_group *group; - struct device *sibling; - sibling = ipmmu_find_sibling_device(dev); - if (sibling) - group = iommu_group_get(sibling); - if (!sibling || !group) - group = generic_device_group(dev); + if (priv->mmu->group) + return iommu_group_ref_get(priv->mmu->group); + + group = iommu_group_alloc(); + if (!IS_ERR(group)) + priv->mmu->group = group; return group; } @@ -873,7 +838,7 @@ static const struct iommu_ops ipmmu_ops = { .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device_dma, .remove_device = ipmmu_remove_device_dma, - .device_group = ipmmu_find_group_dma, + .device_group = ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, }; From e4efe4a9a2ace658a36b5a4f515c11d4d36400a8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 13 Oct 2017 19:23:41 +0100 Subject: [PATCH 42/54] iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv Now that the IPMMU instance pointer is the only thing remaining in the private data structure, we no longer need the extra level of indirection and can simply stash that directlty in the fwspec. Signed-off-by: Robin Murphy Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 6e6a86f3c375..fd05a5f5a47e 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -58,16 +58,12 @@ struct ipmmu_vmsa_domain { spinlock_t lock; /* Protects mappings */ }; -struct ipmmu_vmsa_iommu_priv { - struct ipmmu_vmsa_device *mmu; -}; - static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) { return container_of(dom, struct ipmmu_vmsa_domain, io_domain); } -static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) +static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) { return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; } @@ -565,15 +561,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); struct iommu_fwspec *fwspec = dev->iommu_fwspec; - struct ipmmu_vmsa_device *mmu = priv->mmu; + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned long flags; unsigned int i; int ret = 0; - if (!priv || !priv->mmu) { + if (!mmu) { dev_err(dev, "Cannot attach to IPMMU\n"); return -ENXIO; } @@ -662,18 +657,12 @@ static int ipmmu_init_platform_device(struct device *dev, struct of_phandle_args *args) { struct platform_device *ipmmu_pdev; - struct ipmmu_vmsa_iommu_priv *priv; ipmmu_pdev = of_find_device_by_node(args->np); if (!ipmmu_pdev) return -ENODEV; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->mmu = platform_get_drvdata(ipmmu_pdev); - dev->iommu_fwspec->iommu_priv = priv; + dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); return 0; } @@ -683,7 +672,7 @@ static int ipmmu_of_xlate(struct device *dev, iommu_fwspec_add_ids(dev, spec->args, 1); /* Initialize once - xlate() will call multiple times */ - if (to_priv(dev)) + if (to_ipmmu(dev)) return 0; return ipmmu_init_platform_device(dev, spec); @@ -693,14 +682,14 @@ static int ipmmu_of_xlate(struct device *dev, static int ipmmu_add_device(struct device *dev) { - struct ipmmu_vmsa_device *mmu = NULL; + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; int ret; /* * Only let through devices that have been verified in xlate() */ - if (!to_priv(dev)) + if (!mmu) return -ENODEV; /* Create a device group and add the device to it. */ @@ -729,7 +718,6 @@ static int ipmmu_add_device(struct device *dev) * - Make the mapping size configurable ? We currently use a 2GB mapping * at a 1GB offset to ensure that NULL VAs will fault. */ - mmu = to_priv(dev)->mmu; if (!mmu->mapping) { struct dma_iommu_mapping *mapping; @@ -795,7 +783,7 @@ static int ipmmu_add_device_dma(struct device *dev) /* * Only let through devices that have been verified in xlate() */ - if (!to_priv(dev)) + if (!to_ipmmu(dev)) return -ENODEV; group = iommu_group_get_for_dev(dev); @@ -812,15 +800,15 @@ static void ipmmu_remove_device_dma(struct device *dev) static struct iommu_group *ipmmu_find_group(struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; - if (priv->mmu->group) - return iommu_group_ref_get(priv->mmu->group); + if (mmu->group) + return iommu_group_ref_get(mmu->group); group = iommu_group_alloc(); if (!IS_ERR(group)) - priv->mmu->group = group; + mmu->group = group; return group; } From 49c875f030523d676a508e53f7dc3e592e9439d7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 13 Oct 2017 19:23:42 +0100 Subject: [PATCH 43/54] iommu/ipmmu-vmsa: Unify ipmmu_ops The remaining difference between the ARM-specific and iommu-dma ops is in the {add,remove}_device implementations, but even those have some overlap and duplication. By stubbing out the few arm_iommu_*() calls, we can get rid of the rest of the inline #ifdeffery to both simplify the code and improve build coverage. Signed-off-by: Robin Murphy Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 69 +++++++++++--------------------------- 1 file changed, 19 insertions(+), 50 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index fd05a5f5a47e..f6d2e8e650be 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -27,6 +27,11 @@ #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #include #include +#else +#define arm_iommu_create_mapping(...) NULL +#define arm_iommu_attach_device(...) -ENODEV +#define arm_iommu_release_mapping(...) do {} while (0) +#define arm_iommu_detach_device(...) do {} while (0) #endif #include "io-pgtable.h" @@ -678,26 +683,17 @@ static int ipmmu_of_xlate(struct device *dev, return ipmmu_init_platform_device(dev, spec); } -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) - -static int ipmmu_add_device(struct device *dev) +static int ipmmu_init_arm_mapping(struct device *dev) { struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; int ret; - /* - * Only let through devices that have been verified in xlate() - */ - if (!mmu) - return -ENODEV; - /* Create a device group and add the device to it. */ group = iommu_group_alloc(); if (IS_ERR(group)) { dev_err(dev, "Failed to allocate IOMMU group\n"); - ret = PTR_ERR(group); - goto error; + return PTR_ERR(group); } ret = iommu_group_add_device(group, dev); @@ -705,8 +701,7 @@ static int ipmmu_add_device(struct device *dev) if (ret < 0) { dev_err(dev, "Failed to add device to IPMMU group\n"); - group = NULL; - goto error; + return ret; } /* @@ -742,41 +737,14 @@ static int ipmmu_add_device(struct device *dev) return 0; error: - if (mmu) + iommu_group_remove_device(dev); + if (mmu->mapping) arm_iommu_release_mapping(mmu->mapping); - if (!IS_ERR_OR_NULL(group)) - iommu_group_remove_device(dev); - return ret; } -static void ipmmu_remove_device(struct device *dev) -{ - arm_iommu_detach_device(dev); - iommu_group_remove_device(dev); -} - -static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc, - .domain_free = ipmmu_domain_free, - .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, - .map_sg = default_iommu_map_sg, - .iova_to_phys = ipmmu_iova_to_phys, - .add_device = ipmmu_add_device, - .remove_device = ipmmu_remove_device, - .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, - .of_xlate = ipmmu_of_xlate, -}; - -#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ - -#ifdef CONFIG_IOMMU_DMA - -static int ipmmu_add_device_dma(struct device *dev) +static int ipmmu_add_device(struct device *dev) { struct iommu_group *group; @@ -786,15 +754,20 @@ static int ipmmu_add_device_dma(struct device *dev) if (!to_ipmmu(dev)) return -ENODEV; + if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) + return ipmmu_init_arm_mapping(dev); + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); + iommu_group_put(group); return 0; } -static void ipmmu_remove_device_dma(struct device *dev) +static void ipmmu_remove_device(struct device *dev) { + arm_iommu_detach_device(dev); iommu_group_remove_device(dev); } @@ -824,15 +797,13 @@ static const struct iommu_ops ipmmu_ops = { .iotlb_sync = ipmmu_iotlb_sync, .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, - .add_device = ipmmu_add_device_dma, - .remove_device = ipmmu_remove_device_dma, + .add_device = ipmmu_add_device, + .remove_device = ipmmu_remove_device, .device_group = ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, }; -#endif /* CONFIG_IOMMU_DMA */ - /* ----------------------------------------------------------------------------- * Probe/remove and init */ @@ -929,9 +900,7 @@ static int ipmmu_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&mmu->iommu); iommu_device_unregister(&mmu->iommu); -#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) arm_iommu_release_mapping(mmu->mapping); -#endif ipmmu_device_reset(mmu); From 33f3ac9b511612153bae1d328b0c84c0367cd08d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:29:25 +0900 Subject: [PATCH 44/54] iommu/ipmmu-vmsa: Introduce features, break out alias Introduce struct ipmmu_features to track various hardware and software implementation changes inside the driver for different kinds of IPMMU hardware. Add use_ns_alias_offset as a first example of a feature to control if the secure register bank offset should be used or not. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index f6d2e8e650be..5ce7879cb58d 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -38,11 +39,15 @@ #define IPMMU_CTX_MAX 1 +struct ipmmu_features { + bool use_ns_alias_offset; +}; + struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; struct iommu_device iommu; - + const struct ipmmu_features *features; unsigned int num_utlbs; spinlock_t lock; /* Protects ctx and domains[] */ DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); @@ -817,6 +822,21 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); } +static const struct ipmmu_features ipmmu_features_default = { + .use_ns_alias_offset = true, +}; + +static const struct of_device_id ipmmu_of_ids[] = { + { + .compatible = "renesas,ipmmu-vmsa", + .data = &ipmmu_features_default, + }, { + /* Terminator */ + }, +}; + +MODULE_DEVICE_TABLE(of, ipmmu_of_ids); + static int ipmmu_probe(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu; @@ -834,6 +854,7 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->num_utlbs = 32; spin_lock_init(&mmu->lock); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); + mmu->features = of_device_get_match_data(&pdev->dev); /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -853,7 +874,8 @@ static int ipmmu_probe(struct platform_device *pdev) * Offset the registers base unconditionally to point to the non-secure * alias space for now. */ - mmu->base += IM_NS_ALIAS_OFFSET; + if (mmu->features->use_ns_alias_offset) + mmu->base += IM_NS_ALIAS_OFFSET; irq = platform_get_irq(pdev, 0); if (irq < 0) { @@ -907,11 +929,6 @@ static int ipmmu_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id ipmmu_of_ids[] = { - { .compatible = "renesas,ipmmu-vmsa", }, - { } -}; - static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", From fd5140e29a59e04a6c3e8cc56536bda3e60bbf49 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:29:36 +0900 Subject: [PATCH 45/54] iommu/ipmmu-vmsa: Add optional root device feature Add root device handling to the IPMMU driver by allowing certain DT compat strings to enable has_cache_leaf_nodes that in turn will support both root devices with interrupts and leaf devices that face the actual IPMMU consumer devices. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 90 +++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5ce7879cb58d..5db853b92d3b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -41,12 +41,14 @@ struct ipmmu_features { bool use_ns_alias_offset; + bool has_cache_leaf_nodes; }; struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; struct iommu_device iommu; + struct ipmmu_vmsa_device *root; const struct ipmmu_features *features; unsigned int num_utlbs; spinlock_t lock; /* Protects ctx and domains[] */ @@ -198,6 +200,36 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IMUASID_ASID0_MASK (0xff << 0) #define IMUASID_ASID0_SHIFT 0 +/* ----------------------------------------------------------------------------- + * Root device handling + */ + +static struct platform_driver ipmmu_driver; + +static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu) +{ + return mmu->root == mmu; +} + +static int __ipmmu_check_device(struct device *dev, void *data) +{ + struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev); + struct ipmmu_vmsa_device **rootp = data; + + if (ipmmu_is_root(mmu)) + *rootp = mmu; + + return 0; +} + +static struct ipmmu_vmsa_device *ipmmu_find_root(void) +{ + struct ipmmu_vmsa_device *root = NULL; + + return driver_for_each_device(&ipmmu_driver.driver, NULL, &root, + __ipmmu_check_device) == 0 ? root : NULL; +} + /* ----------------------------------------------------------------------------- * Read/Write Access */ @@ -215,13 +247,15 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) { - return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); + return ipmmu_read(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg); } static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, u32 data) { - ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); + ipmmu_write(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg, data); } /* ----------------------------------------------------------------------------- @@ -369,12 +403,12 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. For now, delegate it to the io-pgtable code. */ - domain->cfg.iommu_dev = domain->mmu->dev; + domain->cfg.iommu_dev = domain->mmu->root->dev; /* * Find an unused context. */ - ret = ipmmu_domain_allocate_context(domain->mmu, domain); + ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); if (ret == IPMMU_CTX_MAX) return -EBUSY; @@ -383,7 +417,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain); if (!domain->iop) { - ipmmu_domain_free_context(domain->mmu, domain->context_id); + ipmmu_domain_free_context(domain->mmu->root, + domain->context_id); return -EINVAL; } @@ -437,7 +472,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) */ ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); ipmmu_tlb_sync(domain); - ipmmu_domain_free_context(domain->mmu, domain->context_id); + ipmmu_domain_free_context(domain->mmu->root, domain->context_id); } /* ----------------------------------------------------------------------------- @@ -824,6 +859,7 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) static const struct ipmmu_features ipmmu_features_default = { .use_ns_alias_offset = true, + .has_cache_leaf_nodes = false, }; static const struct of_device_id ipmmu_of_ids[] = { @@ -878,19 +914,39 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->base += IM_NS_ALIAS_OFFSET; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no IRQ found\n"); - return irq; - } - ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, - dev_name(&pdev->dev), mmu); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); - return ret; - } + /* + * Determine if this IPMMU instance is a root device by checking for + * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. + */ + if (!mmu->features->has_cache_leaf_nodes || + !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL)) + mmu->root = mmu; + else + mmu->root = ipmmu_find_root(); - ipmmu_device_reset(mmu); + /* + * Wait until the root device has been registered for sure. + */ + if (!mmu->root) + return -EPROBE_DEFER; + + /* Root devices have mandatory IRQs */ + if (ipmmu_is_root(mmu)) { + if (irq < 0) { + dev_err(&pdev->dev, "no IRQ found\n"); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, + dev_name(&pdev->dev), mmu); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); + return ret; + } + + ipmmu_device_reset(mmu); + } ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); From 5fd163416fb7b6592521c39f867d5ae6360e7924 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:29:46 +0900 Subject: [PATCH 46/54] iommu/ipmmu-vmsa: Enable multi context support Add support for up to 8 contexts. Each context is mapped to one domain. One domain is assigned one or more slave devices. Contexts are allocated dynamically and slave devices are grouped together based on which IPMMU device they are connected to. This makes slave devices tied to the same IPMMU device share the same IOVA space. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5db853b92d3b..c70efd80f740 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -37,11 +37,12 @@ #include "io-pgtable.h" -#define IPMMU_CTX_MAX 1 +#define IPMMU_CTX_MAX 8 struct ipmmu_features { bool use_ns_alias_offset; bool has_cache_leaf_nodes; + unsigned int number_of_contexts; }; struct ipmmu_vmsa_device { @@ -51,6 +52,7 @@ struct ipmmu_vmsa_device { struct ipmmu_vmsa_device *root; const struct ipmmu_features *features; unsigned int num_utlbs; + unsigned int num_ctx; spinlock_t lock; /* Protects ctx and domains[] */ DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; @@ -352,11 +354,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, spin_lock_irqsave(&mmu->lock, flags); - ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); - if (ret != IPMMU_CTX_MAX) { + ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx); + if (ret != mmu->num_ctx) { mmu->domains[ret] = domain; set_bit(ret, mmu->ctx); - } + } else + ret = -EBUSY; spin_unlock_irqrestore(&mmu->lock, flags); @@ -409,8 +412,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * Find an unused context. */ ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); - if (ret == IPMMU_CTX_MAX) - return -EBUSY; + if (ret < 0) + return ret; domain->context_id = ret; @@ -539,7 +542,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) /* * Check interrupts for all active contexts. */ - for (i = 0; i < IPMMU_CTX_MAX; i++) { + for (i = 0; i < mmu->num_ctx; i++) { if (!mmu->domains[i]) continue; if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) @@ -624,6 +627,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, /* The domain hasn't been used yet, initialize it. */ domain->mmu = mmu; ret = ipmmu_domain_init_context(domain); + if (ret < 0) { + dev_err(dev, "Unable to initialize IPMMU context\n"); + domain->mmu = NULL; + } else { + dev_info(dev, "Using IPMMU context %u\n", + domain->context_id); + } } else if (domain->mmu != mmu) { /* * Something is wrong, we can't attach two devices using @@ -853,13 +863,14 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) unsigned int i; /* Disable all contexts. */ - for (i = 0; i < 4; ++i) + for (i = 0; i < mmu->num_ctx; ++i) ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); } static const struct ipmmu_features ipmmu_features_default = { .use_ns_alias_offset = true, .has_cache_leaf_nodes = false, + .number_of_contexts = 1, /* software only tested with one context */ }; static const struct of_device_id ipmmu_of_ids[] = { @@ -913,6 +924,9 @@ static int ipmmu_probe(struct platform_device *pdev) if (mmu->features->use_ns_alias_offset) mmu->base += IM_NS_ALIAS_OFFSET; + mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX, + mmu->features->number_of_contexts); + irq = platform_get_irq(pdev, 0); /* From cda52fcd999f389c6f24f079910a62e53912d411 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:29:57 +0900 Subject: [PATCH 47/54] iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE() Hook up IOMMU_OF_DECLARE() support in case CONFIG_IOMMU_DMA is enabled. The only current supported case for 32-bit ARM is disabled, however for 64-bit ARM usage of OF is required. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 50 +++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index c70efd80f740..9cde61970bb5 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -962,17 +963,30 @@ static int ipmmu_probe(struct platform_device *pdev) ipmmu_device_reset(mmu); } - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) - return ret; + /* + * Register the IPMMU to the IOMMU subsystem in the following cases: + * - R-Car Gen2 IPMMU (all devices registered) + * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) + */ + if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; - iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); - iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); + iommu_device_set_fwnode(&mmu->iommu, + &pdev->dev.of_node->fwnode); - ret = iommu_device_register(&mmu->iommu); - if (ret) - return ret; + ret = iommu_device_register(&mmu->iommu); + if (ret) + return ret; + +#if defined(CONFIG_IOMMU_DMA) + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &ipmmu_ops); +#endif + } /* * We can't create the ARM mapping here as it requires the bus to have @@ -1010,15 +1024,22 @@ static struct platform_driver ipmmu_driver = { static int __init ipmmu_init(void) { + static bool setup_done; int ret; + if (setup_done) + return 0; + ret = platform_driver_register(&ipmmu_driver); if (ret < 0) return ret; +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &ipmmu_ops); +#endif + setup_done = true; return 0; } @@ -1030,6 +1051,17 @@ static void __exit ipmmu_exit(void) subsys_initcall(ipmmu_init); module_exit(ipmmu_exit); +#ifdef CONFIG_IOMMU_DMA +static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np) +{ + ipmmu_init(); + return 0; +} + +IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", + ipmmu_vmsa_iommu_of_setup); +#endif + MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); MODULE_AUTHOR("Laurent Pinchart "); MODULE_LICENSE("GPL v2"); From 1c894225bf5b1cdffac0c6ef935b61273203d7d5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:30:07 +0900 Subject: [PATCH 48/54] iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master The r8a7795 IPMMU supports 40-bit bus mastering. Both the coherent DMA mask and the streaming DMA mask are set to unlock the 40-bit address space for coherent allocations and streaming operations. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9cde61970bb5..6b74ec62f4b4 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -903,6 +903,7 @@ static int ipmmu_probe(struct platform_device *pdev) spin_lock_init(&mmu->lock); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From d574893aee991efa67fefa849347c49de5df8108 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:30:18 +0900 Subject: [PATCH 49/54] iommu/ipmmu-vmsa: Write IMCTR twice Write IMCTR both in the root device and the leaf node. To allow access of IMCTR introduce the following function: - ipmmu_ctx_write_all() While at it also rename context functions: - ipmmu_ctx_read() -> ipmmu_ctx_read_root() - ipmmu_ctx_write() -> ipmmu_ctx_write_root() Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 56 ++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 6b74ec62f4b4..7587017972b0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -248,19 +248,31 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, iowrite32(data, mmu->base + offset); } -static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) +static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain, + unsigned int reg) { return ipmmu_read(domain->mmu->root, domain->context_id * IM_CTX_SIZE + reg); } -static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, - u32 data) +static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain, + unsigned int reg, u32 data) { ipmmu_write(domain->mmu->root, domain->context_id * IM_CTX_SIZE + reg, data); } +static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain, + unsigned int reg, u32 data) +{ + if (domain->mmu != domain->mmu->root) + ipmmu_write(domain->mmu, + domain->context_id * IM_CTX_SIZE + reg, data); + + ipmmu_write(domain->mmu->root, + domain->context_id * IM_CTX_SIZE + reg, data); +} + /* ----------------------------------------------------------------------------- * TLB and microTLB Management */ @@ -270,7 +282,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) { unsigned int count = 0; - while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { + while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) { cpu_relax(); if (++count == TLB_LOOP_TIMEOUT) { dev_err_ratelimited(domain->mmu->dev, @@ -285,9 +297,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) { u32 reg; - reg = ipmmu_ctx_read(domain, IMCTR); + reg = ipmmu_ctx_read_root(domain, IMCTR); reg |= IMCTR_FLUSH; - ipmmu_ctx_write(domain, IMCTR, reg); + ipmmu_ctx_write_all(domain, IMCTR, reg); ipmmu_tlb_sync(domain); } @@ -428,31 +440,32 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; - ipmmu_ctx_write(domain, IMTTLBR0, ttbr); - ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); + ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); + ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); /* * TTBCR * We use long descriptors with inner-shareable WBWA tables and allocate * the whole 32-bit VA space to TTBR0. */ - ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | - IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | - IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); + ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | + IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | + IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); /* MAIR0 */ - ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); + ipmmu_ctx_write_root(domain, IMMAIR0, + domain->cfg.arm_lpae_s1_cfg.mair[0]); /* IMBUSCR */ - ipmmu_ctx_write(domain, IMBUSCR, - ipmmu_ctx_read(domain, IMBUSCR) & - ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); + ipmmu_ctx_write_root(domain, IMBUSCR, + ipmmu_ctx_read_root(domain, IMBUSCR) & + ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); /* * IMSTR * Clear all interrupt flags. */ - ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); + ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR)); /* * IMCTR @@ -461,7 +474,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * software management as we have no use for it. Flush the TLB as * required when modifying the context registers. */ - ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); + ipmmu_ctx_write_all(domain, IMCTR, + IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); return 0; } @@ -474,7 +488,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) * * TODO: Is TLB flush really needed ? */ - ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); + ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH); ipmmu_tlb_sync(domain); ipmmu_domain_free_context(domain->mmu->root, domain->context_id); } @@ -490,11 +504,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) u32 status; u32 iova; - status = ipmmu_ctx_read(domain, IMSTR); + status = ipmmu_ctx_read_root(domain, IMSTR); if (!(status & err_mask)) return IRQ_NONE; - iova = ipmmu_ctx_read(domain, IMEAR); + iova = ipmmu_ctx_read_root(domain, IMEAR); /* * Clear the error status flags. Unlike traditional interrupt flag @@ -502,7 +516,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) * seems to require 0. The error address register must be read before, * otherwise its value will be 0. */ - ipmmu_ctx_write(domain, IMSTR, 0); + ipmmu_ctx_write_root(domain, IMSTR, 0); /* Log fatal errors. */ if (status & IMSTR_MHIT) From f5c858912acd2b17059ebe6f34abac183bdfbf80 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:30:28 +0900 Subject: [PATCH 50/54] iommu/ipmmu-vmsa: Make IMBUSCTR setup optional Introduce a feature to allow opt-out of setting up IMBUSCR. The default case is unchanged. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 7587017972b0..49f2c697b108 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -44,6 +44,7 @@ struct ipmmu_features { bool use_ns_alias_offset; bool has_cache_leaf_nodes; unsigned int number_of_contexts; + bool setup_imbuscr; }; struct ipmmu_vmsa_device { @@ -457,9 +458,10 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->cfg.arm_lpae_s1_cfg.mair[0]); /* IMBUSCR */ - ipmmu_ctx_write_root(domain, IMBUSCR, - ipmmu_ctx_read_root(domain, IMBUSCR) & - ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); + if (domain->mmu->features->setup_imbuscr) + ipmmu_ctx_write_root(domain, IMBUSCR, + ipmmu_ctx_read_root(domain, IMBUSCR) & + ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); /* * IMSTR @@ -886,6 +888,7 @@ static const struct ipmmu_features ipmmu_features_default = { .use_ns_alias_offset = true, .has_cache_leaf_nodes = false, .number_of_contexts = 1, /* software only tested with one context */ + .setup_imbuscr = true, }; static const struct of_device_id ipmmu_of_ids[] = { From c295f504fb5a38abbb4094e687ee333a75613a0c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:30:39 +0900 Subject: [PATCH 51/54] iommu/ipmmu-vmsa: Allow two bit SL0 Introduce support for two bit SL0 bitfield in IMTTBCR by using a separate feature flag. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 49f2c697b108..65ad6910cb70 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -45,6 +45,7 @@ struct ipmmu_features { bool has_cache_leaf_nodes; unsigned int number_of_contexts; bool setup_imbuscr; + bool twobit_imttbcr_sl0; }; struct ipmmu_vmsa_device { @@ -144,6 +145,10 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IMTTBCR_TSZ0_MASK (7 << 0) #define IMTTBCR_TSZ0_SHIFT O +#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) +#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) +#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) + #define IMBUSCR 0x000c #define IMBUSCR_DVM (1 << 2) #define IMBUSCR_BUSSEL_SYS (0 << 0) @@ -396,6 +401,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; + u32 tmp; int ret; /* @@ -449,9 +455,14 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * We use long descriptors with inner-shareable WBWA tables and allocate * the whole 32-bit VA space to TTBR0. */ + if (domain->mmu->features->twobit_imttbcr_sl0) + tmp = IMTTBCR_SL0_TWOBIT_LVL_1; + else + tmp = IMTTBCR_SL0_LVL_1; + ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | - IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); + IMTTBCR_IRGN0_WB_WA | tmp); /* MAIR0 */ ipmmu_ctx_write_root(domain, IMMAIR0, @@ -889,6 +900,7 @@ static const struct ipmmu_features ipmmu_features_default = { .has_cache_leaf_nodes = false, .number_of_contexts = 1, /* software only tested with one context */ .setup_imbuscr = true, + .twobit_imttbcr_sl0 = false, }; static const struct of_device_id ipmmu_of_ids[] = { From 58b8e8bf409236cdea379b8a3ab5d7b85a003d22 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 16 Oct 2017 21:30:50 +0900 Subject: [PATCH 52/54] iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code Tie in r8a7795 features and update the IOMMU_OF_DECLARE compat string to include the updated compat string. Signed-off-by: Magnus Damm Signed-off-by: Alex Williamson --- drivers/iommu/ipmmu-vmsa.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 65ad6910cb70..8dce3a9de9d8 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -25,6 +25,7 @@ #include #include #include +#include #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #include @@ -749,9 +750,24 @@ static int ipmmu_init_platform_device(struct device *dev, return 0; } +static bool ipmmu_slave_whitelist(struct device *dev) +{ + /* By default, do not allow use of IPMMU */ + return false; +} + +static const struct soc_device_attribute soc_r8a7795[] = { + { .soc_id = "r8a7795", }, + { /* sentinel */ } +}; + static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { + /* For R-Car Gen3 use a white list to opt-in slave devices */ + if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev)) + return -ENODEV; + iommu_fwspec_add_ids(dev, spec->args, 1); /* Initialize once - xlate() will call multiple times */ @@ -903,10 +919,21 @@ static const struct ipmmu_features ipmmu_features_default = { .twobit_imttbcr_sl0 = false, }; +static const struct ipmmu_features ipmmu_features_r8a7795 = { + .use_ns_alias_offset = false, + .has_cache_leaf_nodes = true, + .number_of_contexts = 8, + .setup_imbuscr = false, + .twobit_imttbcr_sl0 = true, +}; + static const struct of_device_id ipmmu_of_ids[] = { { .compatible = "renesas,ipmmu-vmsa", .data = &ipmmu_features_default, + }, { + .compatible = "renesas,ipmmu-r8a7795", + .data = &ipmmu_features_r8a7795, }, { /* Terminator */ }, @@ -1090,6 +1117,8 @@ static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np) IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", ipmmu_vmsa_iommu_of_setup); +IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", + ipmmu_vmsa_iommu_of_setup); #endif MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); From 395df08d2e1de238a9c8c33fdcd0e2160efd63a9 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Mon, 30 Oct 2017 12:37:55 +0100 Subject: [PATCH 53/54] iommu/mediatek: Fix driver name There exist two Mediatek iommu drivers for the two different generations of the device. But both drivers have the same name "mtk-iommu". This breaks the registration of the second driver: Error: Driver 'mtk-iommu' is already registered, aborting... Fix this by changing the name for first generation to "mtk-iommu-v1". Fixes: b17336c55d89 ("iommu/mediatek: add support for mtk iommu generation one HW") Signed-off-by: Matthias Brugger Signed-off-by: Alex Williamson --- drivers/iommu/mtk_iommu_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index bc1efbfb9ddf..542930cd183d 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -708,7 +708,7 @@ static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, .remove = mtk_iommu_remove, .driver = { - .name = "mtk-iommu", + .name = "mtk-iommu-v1", .of_match_table = mtk_iommu_of_ids, .pm = &mtk_iommu_pm_ops, } From 94e2cc4dba39efbb63e84cd4f7243627c16ceac5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Sep 2017 17:21:40 +0200 Subject: [PATCH 54/54] iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq get_cpu_ptr() disabled preemption and returns the ->fq object of the current CPU. raw_cpu_ptr() does the same except that it not disable preemption which means the scheduler can move it to another CPU after it obtained the per-CPU object. In this case this is not bad because the data structure itself is protected with a spin_lock. This change shouldn't matter however on RT it does because the sleeping lock can't be accessed with disabled preemption. Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org Reported-by: vinadhy@gmail.com Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Alex Williamson --- drivers/iommu/iova.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 84bda3a4dafc..466aaa8ba841 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -542,7 +542,7 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) { - struct iova_fq *fq = get_cpu_ptr(iovad->fq); + struct iova_fq *fq = raw_cpu_ptr(iovad->fq); unsigned long flags; unsigned idx; @@ -572,8 +572,6 @@ void queue_iova(struct iova_domain *iovad, if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) mod_timer(&iovad->fq_timer, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); - - put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova);