From 4781bc427b0d23bfde4675d210fd35debee2c9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Mon, 31 Aug 2015 18:13:03 -0400 Subject: [PATCH 01/41] iommu/amd: Return positive value in amd_iommu_detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix amd_iommu_detect() to return positive value on success, like intended, and not zero. This will not change anything in the end as AMD IOMMU disable swiotlb and properly associate itself with devices even if detect() doesn't return a positive value. Signed-off-by: Jérôme Glisse Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5ef347a13cb5..ea2afefe8e57 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2198,7 +2198,7 @@ int __init amd_iommu_detect(void) iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; - return 0; + return 1; } /**************************************************************************** From a591989a7c162587f24305c3fe3bd8f055ed3329 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:31 +0200 Subject: [PATCH 02/41] iommu/amd: Drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) { \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); x = NULL; -} // Signed-off-by: Julia Lawall Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index ea2afefe8e57..3ba1ee709824 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1755,11 +1755,8 @@ static void __init free_on_init_error(void) free_pages((unsigned long)irq_lookup_table, get_order(rlookup_table_size)); - if (amd_iommu_irq_cache) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - - } + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); From d66ce54b4664a0d66429a4de996741581d71cf90 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Sep 2015 19:00:10 +0200 Subject: [PATCH 03/41] iommu/vt-d: Split iommu_prepare_identity_map Split the part of the function that fetches the domain out and put the rest into into a domain_prepare_identity_map, so that the code can also be used with when the domain is already known. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2d7349a3ee14..24d31090b140 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2429,17 +2429,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) +static int domain_prepare_identity_map(struct device *dev, + struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - /* For _hardware_ passthrough, don't bother. But for software passthrough, we do it anyway -- it may indicate a memory range which is reserved in E820, so which didn't get set @@ -2459,8 +2453,7 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } if (end >> agaw_to_width(domain->agaw)) { @@ -2470,18 +2463,27 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } - ret = iommu_domain_identity_map(domain, start, end); + return iommu_domain_identity_map(domain, start, end); +} + +static int iommu_prepare_identity_map(struct device *dev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = domain_prepare_identity_map(dev, domain, start, end); if (ret) - goto error; + domain_exit(domain); - return 0; - - error: - domain_exit(domain); return ret; } From b1ce5b79aec8d8cd8bcd076d8cce8bc3cd690051 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Sep 2015 19:16:01 +0200 Subject: [PATCH 04/41] iommu/vt-d: Create RMRR mappings in newly allocated domains Currently the RMRR entries are created only at boot time. This means they will vanish when the domain allocated at boot time is destroyed. This patch makes sure that also newly allocated domains will get RMRR mappings. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 24d31090b140..6ac6e741c46e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3241,7 +3241,10 @@ static struct iova *intel_alloc_iova(struct device *dev, static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) { + struct dmar_rmrr_unit *rmrr; struct dmar_domain *domain; + struct device *i_dev; + int i, ret; domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { @@ -3250,6 +3253,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) return NULL; } + /* We have a new domain - setup possible RMRRs for the device */ + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, i_dev) { + if (i_dev != dev) + continue; + + ret = domain_prepare_identity_map(dev, domain, + rmrr->base_address, + rmrr->end_address); + if (ret) + dev_err(dev, "Mapping reserved region failed\n"); + } + } + rcu_read_unlock(); + return domain; } From 8128f23c436d0dd4f72412e1bf9256e424479dc3 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 27 Aug 2015 15:33:03 +0200 Subject: [PATCH 05/41] iommu/s390: Add iommu api for s390 pci devices This adds an IOMMU API implementation for s390 PCI devices. Reviewed-by: Sebastian Ott Signed-off-by: Gerald Schaefer Signed-off-by: Joerg Roedel --- MAINTAINERS | 7 + arch/s390/Kconfig | 1 + arch/s390/include/asm/pci.h | 4 + arch/s390/include/asm/pci_dma.h | 5 +- arch/s390/pci/pci_dma.c | 37 ++-- drivers/iommu/Kconfig | 7 + drivers/iommu/Makefile | 1 + drivers/iommu/s390-iommu.c | 337 ++++++++++++++++++++++++++++++++ 8 files changed, 386 insertions(+), 13 deletions(-) create mode 100644 drivers/iommu/s390-iommu.c diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..8dfe79589681 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8943,6 +8943,13 @@ F: drivers/s390/net/*iucv* F: include/net/iucv/ F: net/iucv/ +S390 IOMMU (PCI) +M: Gerald Schaefer +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported +F: drivers/iommu/s390-iommu.c + S3C24XX SD/MMC Driver M: Ben Dooks L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1d57000b1b24..74db332c59b8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -582,6 +582,7 @@ menuconfig PCI bool "PCI support" select HAVE_DMA_ATTRS select PCI_MSI + select IOMMU_SUPPORT help Enable PCI support. diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 34d960353a08..c873e682b67f 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -62,6 +62,8 @@ struct zpci_bar_struct { u8 size; /* order 2 exponent */ }; +struct s390_domain; + /* Private data per function */ struct zpci_dev { struct pci_dev *pdev; @@ -118,6 +120,8 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; + + struct s390_domain *s390_domain; /* s390 IOMMU domain data */ }; static inline bool zdev_enabled(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 30b4c179c38c..7a7abf1a5537 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -192,5 +192,8 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ int zpci_dma_init_device(struct zpci_dev *); void zpci_dma_exit_device(struct zpci_dev *); - +void dma_free_seg_table(unsigned long); +unsigned long *dma_alloc_cpu_table(void); +void dma_cleanup_tables(unsigned long *); +void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int); #endif diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 37505b8b4093..37d10f74425a 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -24,7 +24,7 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -static unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(void) { unsigned long *table, *entry; @@ -114,12 +114,12 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr return &pto[px]; } -static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, - dma_addr_t dma_addr, int flags) +void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr, + dma_addr_t dma_addr, int flags) { unsigned long *entry; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(dma_table, dma_addr); if (!entry) { WARN_ON_ONCE(1); return; @@ -156,7 +156,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, goto no_refresh; for (i = 0; i < nr_pages; i++) { - dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); + dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr, + flags); page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } @@ -181,7 +182,7 @@ no_refresh: return rc; } -static void dma_free_seg_table(unsigned long entry) +void dma_free_seg_table(unsigned long entry) { unsigned long *sto = get_rt_sto(entry); int sx; @@ -193,21 +194,18 @@ static void dma_free_seg_table(unsigned long entry) dma_free_cpu_table(sto); } -static void dma_cleanup_tables(struct zpci_dev *zdev) +void dma_cleanup_tables(unsigned long *table) { - unsigned long *table; int rtx; - if (!zdev || !zdev->dma_table) + if (!table) return; - table = zdev->dma_table; for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) if (reg_entry_isvalid(table[rtx])) dma_free_seg_table(table[rtx]); dma_free_cpu_table(table); - zdev->dma_table = NULL; } static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, @@ -416,6 +414,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev) { int rc; + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + spin_lock_init(&zdev->iommu_bitmap_lock); spin_lock_init(&zdev->dma_table_lock); @@ -450,8 +455,16 @@ out_clean: void zpci_dma_exit_device(struct zpci_dev *zdev) { + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + zpci_unregister_ioat(zdev, 0); - dma_cleanup_tables(zdev); + dma_cleanup_tables(zdev->dma_table); + zdev->dma_table = NULL; vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9da766719c8..5feb70a91a8c 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -369,4 +369,11 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. +config S390_IOMMU + def_bool y if S390 && PCI + depends on S390 && PCI + select IOMMU_API + help + Support for the IOMMU API for s390 PCI devices. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6dcc513d711..a2056d35420b 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o +obj-$(CONFIG_S390_IOMMU) += s390-iommu.o diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c new file mode 100644 index 000000000000..cbe198cb3699 --- /dev/null +++ b/drivers/iommu/s390-iommu.c @@ -0,0 +1,337 @@ +/* + * IOMMU API for s390 PCI devices + * + * Copyright IBM Corp. 2015 + * Author(s): Gerald Schaefer + */ + +#include +#include +#include +#include +#include +#include + +/* + * Physically contiguous memory regions can be mapped with 4 KiB alignment, + * we allow all page sizes that are an order of 4KiB (no special large page + * support so far). + */ +#define S390_IOMMU_PGSIZES (~0xFFFUL) + +struct s390_domain { + struct iommu_domain domain; + struct list_head devices; + unsigned long *dma_table; + spinlock_t dma_table_lock; + spinlock_t list_lock; +}; + +struct s390_domain_device { + struct list_head list; + struct zpci_dev *zdev; +}; + +static struct s390_domain *to_s390_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct s390_domain, domain); +} + +static bool s390_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_INTR_REMAP: + return true; + default: + return false; + } +} + +struct iommu_domain *s390_domain_alloc(unsigned domain_type) +{ + struct s390_domain *s390_domain; + + if (domain_type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); + if (!s390_domain) + return NULL; + + s390_domain->dma_table = dma_alloc_cpu_table(); + if (!s390_domain->dma_table) { + kfree(s390_domain); + return NULL; + } + + spin_lock_init(&s390_domain->dma_table_lock); + spin_lock_init(&s390_domain->list_lock); + INIT_LIST_HEAD(&s390_domain->devices); + + return &s390_domain->domain; +} + +void s390_domain_free(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + + dma_cleanup_tables(s390_domain->dma_table); + kfree(s390_domain); +} + +static int s390_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device; + unsigned long flags; + int rc; + + if (!zdev) + return -ENODEV; + + domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); + if (!domain_device) + return -ENOMEM; + + if (zdev->dma_table) + zpci_dma_exit_device(zdev); + + zdev->dma_table = s390_domain->dma_table; + rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + if (rc) + goto out_restore; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + /* First device defines the DMA range limits */ + if (list_empty(&s390_domain->devices)) { + domain->geometry.aperture_start = zdev->start_dma; + domain->geometry.aperture_end = zdev->end_dma; + domain->geometry.force_aperture = true; + /* Allow only devices with identical DMA range limits */ + } else if (domain->geometry.aperture_start != zdev->start_dma || + domain->geometry.aperture_end != zdev->end_dma) { + rc = -EINVAL; + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + goto out_restore; + } + domain_device->zdev = zdev; + zdev->s390_domain = s390_domain; + list_add(&domain_device->list, &s390_domain->devices); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + return 0; + +out_restore: + zpci_dma_init_device(zdev); + kfree(domain_device); + + return rc; +} + +static void s390_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device, *tmp; + unsigned long flags; + int found = 0; + + if (!zdev) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, + list) { + if (domain_device->zdev == zdev) { + list_del(&domain_device->list); + kfree(domain_device); + found = 1; + break; + } + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + if (found) { + zdev->s390_domain = NULL; + zpci_unregister_ioat(zdev, 0); + zpci_dma_init_device(zdev); + } +} + +static int s390_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int rc; + + group = iommu_group_get(dev); + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + } + + rc = iommu_group_add_device(group, dev); + iommu_group_put(group); + + return rc; +} + +static void s390_iommu_remove_device(struct device *dev) +{ + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct iommu_domain *domain; + + /* + * This is a workaround for a scenario where the IOMMU API common code + * "forgets" to call the detach_dev callback: After binding a device + * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers + * the attach_dev), removing the device via + * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, + * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE + * notifier. + * + * So let's call detach_dev from here if it hasn't been called before. + */ + if (zdev && zdev->s390_domain) { + domain = iommu_get_domain_for_dev(dev); + if (domain) + s390_iommu_detach_device(domain, dev); + } + + iommu_group_remove_device(dev); +} + +static int s390_iommu_update_trans(struct s390_domain *s390_domain, + unsigned long pa, dma_addr_t dma_addr, + size_t size, int flags) +{ + struct s390_domain_device *domain_device; + u8 *page_addr = (u8 *) (pa & PAGE_MASK); + dma_addr_t start_dma_addr = dma_addr; + unsigned long irq_flags, nr_pages, i; + int rc = 0; + + if (dma_addr < s390_domain->domain.geometry.aperture_start || + dma_addr + size > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + if (!nr_pages) + return 0; + + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); + for (i = 0; i < nr_pages; i++) { + dma_update_cpu_trans(s390_domain->dma_table, page_addr, + dma_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + + spin_lock(&s390_domain->list_lock); + list_for_each_entry(domain_device, &s390_domain->devices, list) { + rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, + start_dma_addr, nr_pages * PAGE_SIZE); + if (rc) + break; + } + spin_unlock(&s390_domain->list_lock); + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return rc; +} + +static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_VALID, rc = 0; + + if (!(prot & IOMMU_READ)) + return -EINVAL; + + if (!(prot & IOMMU_WRITE)) + flags |= ZPCI_TABLE_PROTECTED; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + + return rc; +} + +static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + unsigned long *sto, *pto, *rto, flags; + unsigned int rtx, sx, px; + phys_addr_t phys = 0; + + if (iova < domain->geometry.aperture_start || + iova > domain->geometry.aperture_end) + return 0; + + rtx = calc_rtx(iova); + sx = calc_sx(iova); + px = calc_px(iova); + rto = s390_domain->dma_table; + + spin_lock_irqsave(&s390_domain->dma_table_lock, flags); + if (rto && reg_entry_isvalid(rto[rtx])) { + sto = get_rt_sto(rto[rtx]); + if (sto && reg_entry_isvalid(sto[sx])) { + pto = get_st_pto(sto[sx]); + if (pto && pt_entry_isvalid(pto[px])) + phys = pto[px] & ZPCI_PTE_ADDR_MASK; + } + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); + + return phys; +} + +static size_t s390_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_INVALID; + phys_addr_t paddr; + int rc; + + paddr = s390_iommu_iova_to_phys(domain, iova); + if (!paddr) + return 0; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + if (rc) + return 0; + + return size; +} + +static struct iommu_ops s390_iommu_ops = { + .capable = s390_iommu_capable, + .domain_alloc = s390_domain_alloc, + .domain_free = s390_domain_free, + .attach_dev = s390_iommu_attach_device, + .detach_dev = s390_iommu_detach_device, + .map = s390_iommu_map, + .unmap = s390_iommu_unmap, + .iova_to_phys = s390_iommu_iova_to_phys, + .add_device = s390_iommu_add_device, + .remove_device = s390_iommu_remove_device, + .pgsize_bitmap = S390_IOMMU_PGSIZES, +}; + +static int __init s390_iommu_init(void) +{ + return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); +} +subsys_initcall(s390_iommu_init); From c0e44929b571c468c4eca2ffb0fc196b090193f1 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 2 Oct 2015 18:02:43 -0500 Subject: [PATCH 06/41] Documentation: dt: Update OMAP iommu bindings for DRA7 DSPs The DSP processor sub-systems on DRA7xx have two MMU instances each, one for the processor core and the other for an internal EDMA block. These MMUs need an additional shared register to be programmed in the DSP_SYSTEM sub-module to be enabled properly. The OMAP IOMMU bindings is updated to account for this additional syscon property required for these DSP IOMMU instances on DRA7xx SoCs. A new compatible "ti,dra7-dsp-iommu" is also defined to distinguish these devices specifically from other DRA7 IOMMU devices. An example of the DRA7 DSP IOMMU nodes is also added to the document for clarity. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- .../bindings/iommu/ti,omap-iommu.txt | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt index 869699925fd5..4bd10dd881b8 100644 --- a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt +++ b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt @@ -4,6 +4,7 @@ Required properties: - compatible : Should be one of, "ti,omap2-iommu" for OMAP2/OMAP3 IOMMU instances "ti,omap4-iommu" for OMAP4/OMAP5 IOMMU instances + "ti,dra7-dsp-iommu" for DRA7xx DSP IOMMU instances "ti,dra7-iommu" for DRA7xx IOMMU instances - ti,hwmods : Name of the hwmod associated with the IOMMU instance - reg : Address space for the configuration registers @@ -19,6 +20,13 @@ Optional properties: Should be either 8 or 32 (default: 32) - ti,iommu-bus-err-back : Indicates the IOMMU instance supports throwing back a bus error response on MMU faults. +- ti,syscon-mmuconfig : Should be a pair of the phandle to the DSP_SYSTEM + syscon node that contains the additional control + register for enabling the MMU, and the MMU instance + number (0-indexed) within the sub-system. This property + is required for DSP IOMMU instances on DRA7xx SoCs. The + instance number should be 0 for DSP MDMA MMUs and 1 for + DSP EDMA MMUs. Example: /* OMAP3 ISP MMU */ @@ -30,3 +38,22 @@ Example: ti,hwmods = "mmu_isp"; ti,#tlb-entries = <8>; }; + + /* DRA74x DSP2 MMUs */ + mmu0_dsp2: mmu@41501000 { + compatible = "ti,dra7-dsp-iommu"; + reg = <0x41501000 0x100>; + interrupts = ; + ti,hwmods = "mmu0_dsp2"; + #iommu-cells = <0>; + ti,syscon-mmuconfig = <&dsp2_system 0x0>; + }; + + mmu1_dsp2: mmu@41502000 { + compatible = "ti,dra7-dsp-iommu"; + reg = <0x41502000 0x100>; + interrupts = ; + ti,hwmods = "mmu1_dsp2"; + #iommu-cells = <0>; + ti,syscon-mmuconfig = <&dsp2_system 0x1>; + }; From 3ca9299e7dc65f2d8242cd7804818a8e840b5a26 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 2 Oct 2015 18:02:44 -0500 Subject: [PATCH 07/41] iommu/omap: Add support for configuring dsp iommus on DRA7xx The DSP MMUs on DRA7xx SoC requires configuring an additional MMU_CONFIG register present in the DSP_SYSTEM sub module. This setting dictates whether the DSP Core's MDMA and EDMA traffic is routed through the respective MMU or not. Add the support to the OMAP iommu driver so that the traffic is not bypassed when enabling the MMUs. The MMU_CONFIG register has two different bits for enabling each of these two MMUs present in the DSP processor sub-system on DRA7xx. An id field is added to the OMAP iommu object to identify and enable each IOMMU. The id information and the DSP_SYSTEM.MMU_CONFIG register programming is achieved through the processing of the optional "ti,syscon-mmuconfig" property. A proper value is assigned to the id field only when this property is present. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 58 ++++++++++++++++++++++++++++++++++++++ drivers/iommu/omap-iommu.h | 9 ++++++ 2 files changed, 67 insertions(+) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 36d0033c2ccb..3dc5b65f3990 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include @@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev) } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); +static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable) +{ + u32 val, mask; + + if (!obj->syscfg) + return; + + mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT)); + val = enable ? mask : 0; + regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val); +} + static void __iommu_set_twl(struct omap_iommu *obj, bool on) { u32 l = iommu_read_reg(obj, MMU_CNTL); @@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj) iommu_write_reg(obj, pa, MMU_TTB); + dra7_cfg_dspsys_mmu(obj, true); + if (obj->has_bus_err_back) iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); @@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); + dra7_cfg_dspsys_mmu(obj, false); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, + struct omap_iommu *obj) +{ + struct device_node *np = pdev->dev.of_node; + int ret; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return 0; + + if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) { + dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n"); + return -EINVAL; + } + + obj->syscfg = + syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig"); + if (IS_ERR(obj->syscfg)) { + /* can fail with -EPROBE_DEFER */ + ret = PTR_ERR(obj->syscfg); + return ret; + } + + if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1, + &obj->id)) { + dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n"); + return -EINVAL; + } + + if (obj->id != 0 && obj->id != 1) { + dev_err(&pdev->dev, "invalid IOMMU instance id\n"); + return -EINVAL; + } + + return 0; +} + /* * OMAP Device MMU(IOMMU) detection */ @@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev) if (IS_ERR(obj->regbase)) return PTR_ERR(obj->regbase); + err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj); + if (err) + return err; + irq = platform_get_irq(pdev, 0); if (irq < 0) return -ENODEV; @@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = { { .compatible = "ti,omap2-iommu" }, { .compatible = "ti,omap4-iommu" }, { .compatible = "ti,dra7-iommu" }, + { .compatible = "ti,dra7-dsp-iommu" }, {}, }; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a656df2f9e03..59628e5017b4 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -30,6 +30,7 @@ struct iotlb_entry { struct omap_iommu { const char *name; void __iomem *regbase; + struct regmap *syscfg; struct device *dev; struct iommu_domain *domain; struct dentry *debug_dir; @@ -48,6 +49,7 @@ struct omap_iommu { void *ctx; /* iommu context: registres saved area */ int has_bus_err_back; + u32 id; }; struct cr_regs { @@ -158,6 +160,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 : \ ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) +/* + * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP) + */ +#define DSP_SYS_REVISION 0x00 +#define DSP_SYS_MMU_CONFIG 0x18 +#define DSP_SYS_MMU_CONFIG_EN_SHIFT 4 + /* * utilities for super page(16MB, 1MB, 64KB and 4KB) */ From 941a802d939221028baf6d19afd6dc8652219a81 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2015 16:25:10 +0100 Subject: [PATCH 08/41] iommu/arm-smmu: Use drvdata instead of maintaining smmu_devices list Rather than keep a private list of struct arm_smmu_device and searching this whenever we need to look up the correct SMMU instance, instead use the drvdata field in the struct device to take care of the mapping for us. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 42 ++++++++----------------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index dafaf59dc3b8..637014ab8965 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -568,7 +569,6 @@ struct arm_smmu_device { unsigned int sid_bits; struct arm_smmu_strtab_cfg strtab_cfg; - struct list_head list; }; /* SMMU private data for an IOMMU group */ @@ -603,10 +603,6 @@ struct arm_smmu_domain { struct iommu_domain domain; }; -/* Our list of SMMU instances */ -static DEFINE_SPINLOCK(arm_smmu_devices_lock); -static LIST_HEAD(arm_smmu_devices); - struct arm_smmu_option_prop { u32 opt; const char *prop; @@ -1722,7 +1718,8 @@ static void __arm_smmu_release_pci_iommudata(void *data) static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) { struct device_node *of_node; - struct arm_smmu_device *curr, *smmu = NULL; + struct platform_device *smmu_pdev; + struct arm_smmu_device *smmu = NULL; struct pci_bus *bus = pdev->bus; /* Walk up to the root bus */ @@ -1735,14 +1732,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) return NULL; /* See if we can find an SMMU corresponding to the phandle */ - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev->of_node == of_node) { - smmu = curr; - break; - } - } - spin_unlock(&arm_smmu_devices_lock); + smmu_pdev = of_find_device_by_node(of_node); + if (smmu_pdev) + smmu = platform_get_drvdata(smmu_pdev); + of_node_put(of_node); return smmu; } @@ -2609,10 +2602,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) goto out_free_structures; /* Record our private device structure */ - INIT_LIST_HEAD(&smmu->list); - spin_lock(&arm_smmu_devices_lock); - list_add(&smmu->list, &arm_smmu_devices); - spin_unlock(&arm_smmu_devices_lock); + platform_set_drvdata(pdev, smmu); return 0; out_free_structures: @@ -2622,21 +2612,7 @@ out_free_structures: static int arm_smmu_device_remove(struct platform_device *pdev) { - struct arm_smmu_device *curr, *smmu = NULL; - struct device *dev = &pdev->dev; - - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev == dev) { - smmu = curr; - list_del(&smmu->list); - break; - } - } - spin_unlock(&arm_smmu_devices_lock); - - if (!smmu) - return -ENODEV; + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); arm_smmu_free_structures(smmu); From 077124c98da3aa5aaf498fd0980ed14eaf777c09 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 18 Aug 2015 17:12:24 +0100 Subject: [PATCH 09/41] iommu/arm-smmu: Remove unneeded '0x' annotation '%pad' automatically prints with '0x', so remove the explicit '0x' annotation. Signed-off-by: Fabio Estevam Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 48a39dfa9777..677fba9e6b11 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1222,7 +1222,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { dev_err(dev, - "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", + "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); return ops->iova_to_phys(ops, iova); } From 668b4ada1cdf406dac9f72503fa2f69f31bed0c5 Mon Sep 17 00:00:00 2001 From: Tirumalesh Chalamarla Date: Wed, 19 Aug 2015 00:40:30 +0100 Subject: [PATCH 10/41] iommu/arm-smmu: ThunderX mis-extends 64bit registers The SMMU architecture defines two different behaviors when 64-bit registers are written with 32-bit writes. The first behavior causes zero extension into the upper 32-bits. The second behavior splits a 64-bit register into "normal" 32-bit register pairs. On some buggy implementations, registers incorrectly zero extended when they should instead behave as normal 32-bit register pairs. Signed-off-by: Tirumalesh Chalamarla [will: removed redundant macro parameters] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 48 ++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 677fba9e6b11..9aadf36a0747 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -70,6 +70,18 @@ ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ ? 0x400 : 0)) +#ifdef CONFIG_64BIT +#define smmu_writeq writeq_relaxed +#else +#define smmu_writeq(reg64, addr) \ + do { \ + u64 __val = (reg64); \ + void __iomem *__addr = (addr); \ + writel_relaxed(__val >> 32, __addr + 4); \ + writel_relaxed(__val, __addr); \ + } while (0) +#endif + /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 #define sCR0_CLIENTPD (1 << 0) @@ -185,10 +197,8 @@ #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_CB_TTBCR2 0x10 -#define ARM_SMMU_CB_TTBR0_LO 0x20 -#define ARM_SMMU_CB_TTBR0_HI 0x24 -#define ARM_SMMU_CB_TTBR1_LO 0x28 -#define ARM_SMMU_CB_TTBR1_HI 0x2c +#define ARM_SMMU_CB_TTBR0 0x20 +#define ARM_SMMU_CB_TTBR1 0x28 #define ARM_SMMU_CB_TTBCR 0x30 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -226,7 +236,7 @@ #define TTBCR2_SEP_SHIFT 15 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) -#define TTBRn_HI_ASID_SHIFT 16 +#define TTBRn_ASID_SHIFT 48 #define FSR_MULTI (1 << 31) #define FSR_SS (1 << 30) @@ -695,6 +705,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { u32 reg; + u64 reg64; bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -738,22 +749,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); + + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1); } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); } /* TTBCR */ @@ -1212,11 +1218,9 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; -#ifdef CONFIG_64BIT if (smmu->version == ARM_SMMU_V2) - writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); + smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR); else -#endif writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, From dfddb969edf021f21a45fc6fd019db4f99d12308 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Oct 2015 18:16:46 -0400 Subject: [PATCH 11/41] iommu/vt-d: Switch from ioremap_cache to memremap In preparation for deprecating ioremap_cache() convert its usage in intel-iommu to memremap. This also eliminates the mishandling of the __iomem annotation in the implementation. Cc: David Woodhouse Signed-off-by: Dan Williams Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 20 +++++++++++--------- drivers/iommu/intel_irq_remapping.c | 8 ++++---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6ac6e741c46e..c5426e847255 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -2809,18 +2810,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu) } static int copy_context_table(struct intel_iommu *iommu, - struct root_entry __iomem *old_re, + struct root_entry *old_re, struct context_entry **tbl, int bus, bool ext) { int tbl_idx, pos = 0, idx, devfn, ret = 0, did; - struct context_entry __iomem *old_ce = NULL; struct context_entry *new_ce = NULL, ce; + struct context_entry *old_ce = NULL; struct root_entry re; phys_addr_t old_ce_phys; tbl_idx = ext ? bus * 2 : bus; - memcpy_fromio(&re, old_re, sizeof(re)); + memcpy(&re, old_re, sizeof(re)); for (devfn = 0; devfn < 256; devfn++) { /* First calculate the correct index */ @@ -2855,7 +2856,8 @@ static int copy_context_table(struct intel_iommu *iommu, } ret = -ENOMEM; - old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE); + old_ce = memremap(old_ce_phys, PAGE_SIZE, + MEMREMAP_WB); if (!old_ce) goto out; @@ -2867,7 +2869,7 @@ static int copy_context_table(struct intel_iommu *iommu, } /* Now copy the context entry */ - memcpy_fromio(&ce, old_ce + idx, sizeof(ce)); + memcpy(&ce, old_ce + idx, sizeof(ce)); if (!__context_present(&ce)) continue; @@ -2903,7 +2905,7 @@ static int copy_context_table(struct intel_iommu *iommu, __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); out_unmap: - iounmap(old_ce); + memunmap(old_ce); out: return ret; @@ -2911,8 +2913,8 @@ out: static int copy_translation_tables(struct intel_iommu *iommu) { - struct root_entry __iomem *old_rt; struct context_entry **ctxt_tbls; + struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; unsigned long flags; @@ -2937,7 +2939,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (!old_rt_phys) return -EINVAL; - old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE); + old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); if (!old_rt) return -ENOMEM; @@ -2986,7 +2988,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) ret = 0; out_unmap: - iounmap(old_rt); + memunmap(old_rt); return ret; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9ec4e0d94ffd..bdc52cc6ed23 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -384,7 +384,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) static int iommu_load_old_irte(struct intel_iommu *iommu) { - struct irte __iomem *old_ir_table; + struct irte *old_ir_table; phys_addr_t irt_phys; unsigned int i; size_t size; @@ -408,12 +408,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); /* Map the old IR table */ - old_ir_table = ioremap_cache(irt_phys, size); + old_ir_table = memremap(irt_phys, size, MEMREMAP_WB); if (!old_ir_table) return -ENOMEM; /* Copy data over */ - memcpy_fromio(iommu->ir_table->base, old_ir_table, size); + memcpy(iommu->ir_table->base, old_ir_table, size); __iommu_flush_cache(iommu, iommu->ir_table->base, size); @@ -426,7 +426,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) bitmap_set(iommu->ir_table->bitmap, i, 1); } - iounmap(old_ir_table); + memunmap(old_ir_table); return 0; } From 344cb4e0b6f3a0dbef0643eacb4946338eb228c0 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Thu, 15 Oct 2015 10:19:11 +0800 Subject: [PATCH 12/41] iommu/vt-d: Use cmpxchg16b to update posted format IRTE atomically If IRTE is in posted format, the 'pda' field goes across the 64-bit boundary, we need use cmpxchg16b to atomically update it. We only expose posted-interrupt when X86_FEATURE_CX16 is supported and use to update it atomically. Signed-off-by: Feng Wu Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 33 ++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index bdc52cc6ed23..a50468e7d8c2 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit(&irte->low, irte_modified->low); - set_64bit(&irte->high, irte_modified->high); +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) + if ((irte->pst == 1) || (irte_modified->pst == 1)) { + bool ret; + + ret = cmpxchg_double(&irte->low, &irte->high, + irte->low, irte->high, + irte_modified->low, irte_modified->high); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + } else +#endif + { + set_64bit(&irte->low, irte_modified->low); + set_64bit(&irte->high, irte_modified->high); + } __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); @@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void) struct intel_iommu *iommu; if (!disable_irq_post) { - intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; + /* + * If IRTE is in posted format, the 'pda' field goes across the + * 64-bit boundary, we need use cmpxchg16b to atomically update + * it. We only expose posted-interrupt when X86_FEATURE_CX16 + * is supported. Actually, hardware platforms supporting PI + * should have X86_FEATURE_CX16 support, this has been confirmed + * with Intel hardware guys. + */ + if ( cpu_has_cx16 ) + intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; for_each_iommu(iommu, drhd) if (!cap_pi_support(iommu->cap)) { From 0db2e5d18f76a66ca945447d9f610bed0a94ca5a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Oct 2015 20:13:58 +0100 Subject: [PATCH 13/41] iommu: Implement common IOMMU ops for DMA mapping Taking inspiration from the existing arch/arm code, break out some generic functions to interface the DMA-API to the IOMMU-API. This will do the bulk of the heavy lifting for IOMMU-backed dma-mapping. Since associating an IOVA allocator with an IOMMU domain is a fairly common need, rather than introduce yet another private structure just to do this for ourselves, extend the top-level struct iommu_domain with the notion. A simple opaque cookie allows reuse by other IOMMU API users with their various different incompatible allocator types. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 7 + drivers/iommu/Makefile | 1 + drivers/iommu/dma-iommu.c | 524 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-iommu.h | 85 +++++++ include/linux/iommu.h | 1 + 5 files changed, 618 insertions(+) create mode 100644 drivers/iommu/dma-iommu.c create mode 100644 include/linux/dma-iommu.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9da766719c8..7414f33acfba 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -49,6 +49,13 @@ config OF_IOMMU def_bool y depends on OF && IOMMU_API +# IOMMU-agnostic DMA-mapping layer +config IOMMU_DMA + bool + depends on NEED_SG_DMA_LENGTH + select IOMMU_API + select IOMMU_IOVA + config FSL_PAMU bool "Freescale IOMMU support" depends on PPC32 diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6dcc513d711..f465cfbdb183 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c new file mode 100644 index 000000000000..3a20db4f8604 --- /dev/null +++ b/drivers/iommu/dma-iommu.c @@ -0,0 +1,524 @@ +/* + * A fairly generic DMA-API to IOMMU-API glue layer. + * + * Copyright (C) 2014-2015 ARM Ltd. + * + * based in part on arch/arm/mm/dma-mapping.c: + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +int iommu_dma_init(void) +{ + return iova_cache_get(); +} + +/** + * iommu_get_dma_cookie - Acquire DMA-API resources for a domain + * @domain: IOMMU domain to prepare for DMA-API usage + * + * IOMMU drivers should normally call this from their domain_alloc + * callback when domain->type == IOMMU_DOMAIN_DMA. + */ +int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad; + + if (domain->iova_cookie) + return -EEXIST; + + iovad = kzalloc(sizeof(*iovad), GFP_KERNEL); + domain->iova_cookie = iovad; + + return iovad ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(iommu_get_dma_cookie); + +/** + * iommu_put_dma_cookie - Release a domain's DMA mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * + * IOMMU drivers should normally call this from their domain_free callback. + */ +void iommu_put_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad = domain->iova_cookie; + + if (!iovad) + return; + + put_iova_domain(iovad); + kfree(iovad); + domain->iova_cookie = NULL; +} +EXPORT_SYMBOL(iommu_put_dma_cookie); + +/** + * iommu_dma_init_domain - Initialise a DMA mapping domain + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * @base: IOVA at which the mappable address space starts + * @size: Size of IOVA space + * + * @base and @size should be exact multiples of IOMMU page granularity to + * avoid rounding surprises. If necessary, we reserve the page at address 0 + * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but + * any change which could make prior IOVAs invalid will fail. + */ +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long order, base_pfn, end_pfn; + + if (!iovad) + return -ENODEV; + + /* Use the smallest supported page size for IOVA granularity */ + order = __ffs(domain->ops->pgsize_bitmap); + base_pfn = max_t(unsigned long, 1, base >> order); + end_pfn = (base + size - 1) >> order; + + /* Check the domain allows at least some access to the device... */ + if (domain->geometry.force_aperture) { + if (base > domain->geometry.aperture_end || + base + size <= domain->geometry.aperture_start) { + pr_warn("specified DMA range outside IOMMU capability\n"); + return -EFAULT; + } + /* ...then finally give it a kicking to make sure it fits */ + base_pfn = max_t(unsigned long, base_pfn, + domain->geometry.aperture_start >> order); + end_pfn = min_t(unsigned long, end_pfn, + domain->geometry.aperture_end >> order); + } + + /* All we can safely do with an existing domain is enlarge it */ + if (iovad->start_pfn) { + if (1UL << order != iovad->granule || + base_pfn != iovad->start_pfn || + end_pfn < iovad->dma_32bit_pfn) { + pr_warn("Incompatible range for DMA domain\n"); + return -EFAULT; + } + iovad->dma_32bit_pfn = end_pfn; + } else { + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + } + return 0; +} +EXPORT_SYMBOL(iommu_dma_init_domain); + +/** + * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags + * @dir: Direction of DMA transfer + * @coherent: Is the DMA master cache-coherent? + * + * Return: corresponding IOMMU API page protection flags + */ +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, + dma_addr_t dma_limit) +{ + unsigned long shift = iova_shift(iovad); + unsigned long length = iova_align(iovad, size) >> shift; + + /* + * Enforce size-alignment to be safe - there could perhaps be an + * attribute to control this per-device, or at least per-domain... + */ + return alloc_iova(iovad, length, dma_limit >> shift, true); +} + +/* The IOVA allocator knows what we mapped, so just unmap whatever that was */ +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long shift = iova_shift(iovad); + unsigned long pfn = dma_addr >> shift; + struct iova *iova = find_iova(iovad, pfn); + size_t size; + + if (WARN_ON(!iova)) + return; + + size = iova_size(iova) << shift; + size -= iommu_unmap(domain, pfn << shift, size); + /* ...and if we can't, then something is horribly, horribly wrong */ + WARN_ON(size > 0); + __free_iova(iovad, iova); +} + +static void __iommu_dma_free_pages(struct page **pages, int count) +{ + while (count--) + __free_page(pages[count]); + kvfree(pages); +} + +static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) +{ + struct page **pages; + unsigned int i = 0, array_size = count * sizeof(*pages); + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + /* IOMMU can map any pages, so himem can also be used here */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + struct page *page = NULL; + int j, order = __fls(count); + + /* + * Higher-order allocations are a convenience rather + * than a necessity, hence using __GFP_NORETRY until + * falling back to single-page allocations. + */ + for (order = min(order, MAX_ORDER); order > 0; order--) { + page = alloc_pages(gfp | __GFP_NORETRY, order); + if (!page) + continue; + if (PageCompound(page)) { + if (!split_huge_page(page)) + break; + __free_pages(page, order); + } else { + split_page(page, order); + break; + } + } + if (!page) + page = alloc_page(gfp); + if (!page) { + __iommu_dma_free_pages(pages, i); + return NULL; + } + j = 1 << order; + count -= j; + while (j--) + pages[i++] = page++; + } + return pages; +} + +/** + * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc() + * @dev: Device which owns this buffer + * @pages: Array of buffer pages as returned by iommu_dma_alloc() + * @size: Size of buffer in bytes + * @handle: DMA address of buffer + * + * Frees both the pages associated with the buffer, and the array + * describing them + */ +void iommu_dma_free(struct device *dev, struct page **pages, size_t size, + dma_addr_t *handle) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); + __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); + *handle = DMA_ERROR_CODE; +} + +/** + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space + * @dev: Device to allocate memory for. Must be a real device + * attached to an iommu_dma_domain + * @size: Size of buffer in bytes + * @gfp: Allocation flags + * @prot: IOMMU mapping flags + * @handle: Out argument for allocated DMA handle + * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the + * given VA/PA are visible to the given non-coherent device. + * + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, + * but an IOMMU which supports smaller pages might not map the whole thing. + * + * Return: Array of struct page pointers describing the buffer, + * or NULL on failure. + */ +struct page **iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, int prot, dma_addr_t *handle, + void (*flush_page)(struct device *, const void *, phys_addr_t)) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct page **pages; + struct sg_table sgt; + dma_addr_t dma_addr; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + *handle = DMA_ERROR_CODE; + + pages = __iommu_dma_alloc_pages(count, gfp); + if (!pages) + return NULL; + + iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); + if (!iova) + goto out_free_pages; + + size = iova_align(iovad, size); + if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) + goto out_free_iova; + + if (!(prot & IOMMU_CACHE)) { + struct sg_mapping_iter miter; + /* + * The CPU-centric flushing implied by SG_MITER_TO_SG isn't + * sufficient here, so skip it by using the "wrong" direction. + */ + sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG); + while (sg_miter_next(&miter)) + flush_page(dev, miter.addr, page_to_phys(miter.page)); + sg_miter_stop(&miter); + } + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) + < size) + goto out_free_sg; + + *handle = dma_addr; + sg_free_table(&sgt); + return pages; + +out_free_sg: + sg_free_table(&sgt); +out_free_iova: + __free_iova(iovad, iova); +out_free_pages: + __iommu_dma_free_pages(pages, count); + return NULL; +} + +/** + * iommu_dma_mmap - Map a buffer into provided user VMA + * @pages: Array representing buffer from iommu_dma_alloc() + * @size: Size of buffer in bytes + * @vma: VMA describing requested userspace mapping + * + * Maps the pages of the buffer in @pages into @vma. The caller is responsible + * for verifying the correct size and protection of @vma beforehand. + */ + +int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) +{ + unsigned long uaddr = vma->vm_start; + unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT; + int ret = -ENXIO; + + for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) { + ret = vm_insert_page(vma, uaddr, pages[i]); + if (ret) + break; + uaddr += PAGE_SIZE; + } + return ret; +} + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int prot) +{ + dma_addr_t dma_addr; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + phys_addr_t phys = page_to_phys(page) + offset; + size_t iova_off = iova_offset(iovad, phys); + size_t len = iova_align(iovad, size + iova_off); + struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); + + if (!iova) + return DMA_ERROR_CODE; + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { + __free_iova(iovad, iova); + return DMA_ERROR_CODE; + } + return dma_addr + iova_off; +} + +void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * Handling IOVA concatenation can come later, if needed + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + /* Un-swizzling the fields here, hence the naming mismatch */ + unsigned int s_offset = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_dma_len = s->length; + + s->offset = s_offset; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_offset; + dma_addr += s_dma_len; + } + return i; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_ERROR_CODE) + s->offset = sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_len(s) = 0; + } +} + +/* + * The DMA API client is passing in a scatterlist which could describe + * any old buffer layout, but the IOMMU API requires everything to be + * aligned to IOMMU pages. Hence the need for this complicated bit of + * impedance-matching, to be able to hand off a suitably-aligned list, + * but still preserve the original offsets and sizes for the caller. + */ +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int prot) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct scatterlist *s, *prev = NULL; + dma_addr_t dma_addr; + size_t iova_len = 0; + int i; + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * hiding the original data in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_offset = iova_offset(iovad, s->offset); + size_t s_length = s->length; + + sg_dma_address(s) = s->offset; + sg_dma_len(s) = s_length; + s->offset -= s_offset; + s_length = iova_align(iovad, s_length + s_offset); + s->length = s_length; + + /* + * The simple way to avoid the rare case of a segment + * crossing the boundary mask is to pad the previous one + * to end at a naturally-aligned IOVA for this one's size, + * at the cost of potentially over-allocating a little. + */ + if (prev) { + size_t pad_len = roundup_pow_of_two(s_length); + + pad_len = (pad_len - iova_len) & (pad_len - 1); + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); + if (!iova) + goto out_restore_sg; + + /* + * We'll leave any physical concatenation to the IOMMU driver's + * implementation - it knows better than we do. + */ + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) + goto out_free_iova; + + return __finalise_sg(dev, sg, nents, dma_addr); + +out_free_iova: + __free_iova(iovad, iova); +out_restore_sg: + __invalidate_sg(sg, nents); + return 0; +} + +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, so this is incredibly easy. + */ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); +} + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + /* + * 'Special' IOMMUs which don't have the same addressing capability + * as the CPU will have to wait until we have some way to query that + * before they'll be able to use this framework. + */ + return 1; +} + +int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == DMA_ERROR_CODE; +} diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h new file mode 100644 index 000000000000..fc481037478a --- /dev/null +++ b/include/linux/dma-iommu.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014-2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __DMA_IOMMU_H +#define __DMA_IOMMU_H + +#ifdef __KERNEL__ +#include + +#ifdef CONFIG_IOMMU_DMA +#include + +int iommu_dma_init(void); + +/* Domain management interface for IOMMU drivers */ +int iommu_get_dma_cookie(struct iommu_domain *domain); +void iommu_put_dma_cookie(struct iommu_domain *domain); + +/* Setup call for arch DMA mapping code */ +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size); + +/* General helpers for DMA-API <-> IOMMU-API interaction */ +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); + +/* + * These implement the bulk of the relevant DMA mapping callbacks, but require + * the arch code to take care of attributes and cache maintenance + */ +struct page **iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, int prot, dma_addr_t *handle, + void (*flush_page)(struct device *, const void *, phys_addr_t)); +void iommu_dma_free(struct device *dev, struct page **pages, size_t size, + dma_addr_t *handle); + +int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma); + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int prot); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int prot); + +/* + * Arch code with no special attribute handling may use these + * directly as DMA mapping callbacks for simplicity + */ +void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs); +int iommu_dma_supported(struct device *dev, u64 mask); +int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); + +#else + +struct iommu_domain; + +static inline int iommu_dma_init(void) +{ + return 0; +} + +static inline int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + return -ENODEV; +} + +static inline void iommu_put_dma_cookie(struct iommu_domain *domain) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ +#endif /* __KERNEL__ */ +#endif /* __DMA_IOMMU_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f9c1b6d0f2e4..f174506a5343 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -81,6 +81,7 @@ struct iommu_domain { iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; + void *iova_cookie; }; enum iommu_cap { From 13b8629f651164d71f4d38b821925f93ba4236c8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Oct 2015 20:13:59 +0100 Subject: [PATCH 14/41] arm64: Add IOMMU dma_ops Taking some inspiration from the arch/arm code, implement the arch-specific side of the DMA mapping ops using the new IOMMU-DMA layer. Since there is still work to do elsewhere to make DMA configuration happen in a more appropriate order and properly support platform devices in the IOMMU core, the device setup code unfortunately starts out carrying some workarounds to ensure it works correctly in the current state of things. Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 435 ++++++++++++++++++++++++++++++++++++ 1 file changed, 435 insertions(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 99224dcebdc5..322404193044 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -533,3 +533,438 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init); + + +#ifdef CONFIG_IOMMU_DMA +#include +#include +#include + +/* Thankfully, all cache ops are by VA so we can ignore phys here */ +static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) +{ + __dma_flush_range(virt, virt + PAGE_SIZE); +} + +static void *__iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); + void *addr; + + if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) + return NULL; + /* + * Some drivers rely on this, and we probably don't want the + * possibility of stale kernel data being read by devices anyway. + */ + gfp |= __GFP_ZERO; + + if (gfp & __GFP_WAIT) { + struct page **pages; + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + + pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, + flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, size, handle); + } else { + struct page *page; + /* + * In atomic context we can't remap anything, so we'll only + * get the virtually contiguous buffer we need by way of a + * physically contiguous allocation. + */ + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = __alloc_from_pool(size, &page, gfp); + } + if (!addr) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + if (coherent) + __free_pages(page, get_order(size)); + else + __free_from_pool(addr, size); + addr = NULL; + } + } + return addr; +} + +static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + /* + * @cpu_addr will be one of 3 things depending on how it was allocated: + * - A remapped array of pages from iommu_dma_alloc(), for all + * non-atomic allocations. + * - A non-cacheable alias from the atomic pool, for atomic + * allocations by non-coherent devices. + * - A normal lowmem address, for atomic allocations by + * coherent devices. + * Hence how dodgy the below logic looks... + */ + if (__in_atomic_pool(cpu_addr, size)) { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_from_pool(cpu_addr, size); + } else if (is_vmalloc_addr(cpu_addr)){ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return; + iommu_dma_free(dev, area->pages, size, &handle); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); + } else { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_pages(virt_to_page(cpu_addr), get_order(size)); + } +} + +static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + struct vm_struct *area; + int ret; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + area = find_vm_area(cpu_addr); + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return iommu_dma_mmap(area->pages, size, vma); +} + +static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, + GFP_KERNEL); +} + +static void __iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_unmap_area(phys_to_virt(phys), size, dir); +} + +static void __iommu_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_map_area(phys_to_virt(phys), size, dir); +} + +static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int prot = dma_direction_to_prot(dir, coherent); + dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); + + if (!iommu_dma_mapping_error(dev, dev_addr) && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_device(dev, dev_addr, size, dir); + + return dev_addr; +} + +static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); + + iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static void __iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(sg_virt(sg), sg->length, dir); +} + +static void __iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(sg_virt(sg), sg->length, dir); +} + +static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_device(dev, sgl, nelems, dir); + + return iommu_dma_map_sg(dev, sgl, nelems, + dma_direction_to_prot(dir, coherent)); +} + +static void __iommu_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); + + iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); +} + +static struct dma_map_ops iommu_dma_ops = { + .alloc = __iommu_alloc_attrs, + .free = __iommu_free_attrs, + .mmap = __iommu_mmap_attrs, + .get_sgtable = __iommu_get_sgtable, + .map_page = __iommu_map_page, + .unmap_page = __iommu_unmap_page, + .map_sg = __iommu_map_sg_attrs, + .unmap_sg = __iommu_unmap_sg_attrs, + .sync_single_for_cpu = __iommu_sync_single_for_cpu, + .sync_single_for_device = __iommu_sync_single_for_device, + .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, + .sync_sg_for_device = __iommu_sync_sg_for_device, + .dma_supported = iommu_dma_supported, + .mapping_error = iommu_dma_mapping_error, +}; + +/* + * TODO: Right now __iommu_setup_dma_ops() gets called too early to do + * everything it needs to - the device is only partially created and the + * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we + * need this delayed attachment dance. Once IOMMU probe ordering is sorted + * to move the arch_setup_dma_ops() call later, all the notifier bits below + * become unnecessary, and will go away. + */ +struct iommu_dma_notifier_data { + struct list_head list; + struct device *dev; + const struct iommu_ops *ops; + u64 dma_base; + u64 size; +}; +static LIST_HEAD(iommu_dma_masters); +static DEFINE_MUTEX(iommu_dma_notifier_lock); + +/* + * Temporarily "borrow" a domain feature flag to to tell if we had to resort + * to creating our own domain here, in case we need to clean it up again. + */ +#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) + +static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + /* + * Best case: The device is either part of a group which was + * already attached to a domain in a previous call, or it's + * been put in a default DMA domain by the IOMMU core. + */ + if (!domain) { + /* + * Urgh. The IOMMU core isn't going to do default domains + * for non-PCI devices anyway, until it has some means of + * abstracting the entirely implementation-specific + * sideband data/SoC topology/unicorn dust that may or + * may not differentiate upstream masters. + * So until then, HORRIBLE HACKS! + */ + domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); + if (!domain) + goto out_no_domain; + + domain->ops = ops; + domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; + + if (iommu_attach_device(domain, dev)) + goto out_put_domain; + } + + if (iommu_dma_init_domain(domain, dma_base, size)) + goto out_detach; + + dev->archdata.dma_ops = &iommu_dma_ops; + return true; + +out_detach: + iommu_detach_device(domain, dev); +out_put_domain: + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); +out_no_domain: + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + return false; +} + +static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_dma_notifier_data *iommudata; + + iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); + if (!iommudata) + return; + + iommudata->dev = dev; + iommudata->ops = ops; + iommudata->dma_base = dma_base; + iommudata->size = size; + + mutex_lock(&iommu_dma_notifier_lock); + list_add(&iommudata->list, &iommu_dma_masters); + mutex_unlock(&iommu_dma_notifier_lock); +} + +static int __iommu_attach_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct iommu_dma_notifier_data *master, *tmp; + + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + mutex_lock(&iommu_dma_notifier_lock); + list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { + if (do_iommu_attach(master->dev, master->ops, + master->dma_base, master->size)) { + list_del(&master->list); + kfree(master); + } + } + mutex_unlock(&iommu_dma_notifier_lock); + return 0; +} + +static int register_iommu_dma_ops_notifier(struct bus_type *bus) +{ + struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); + int ret; + + if (!nb) + return -ENOMEM; + /* + * The device must be attached to a domain before the driver probe + * routine gets a chance to start allocating DMA buffers. However, + * the IOMMU driver also needs a chance to configure the iommu_group + * via its add_device callback first, so we need to make the attach + * happen between those two points. Since the IOMMU core uses a bus + * notifier with default priority for add_device, do the same but + * with a lower priority to ensure the appropriate ordering. + */ + nb->notifier_call = __iommu_attach_notifier; + nb->priority = -100; + + ret = bus_register_notifier(bus, nb); + if (ret) { + pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", + bus->name); + kfree(nb); + } + return ret; +} + +static int __init __iommu_dma_init(void) +{ + int ret; + + ret = iommu_dma_init(); + if (!ret) + ret = register_iommu_dma_ops_notifier(&platform_bus_type); + if (!ret) + ret = register_iommu_dma_ops_notifier(&amba_bustype); + return ret; +} +arch_initcall(__iommu_dma_init); + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *ops) +{ + struct iommu_group *group; + + if (!ops) + return; + /* + * TODO: As a concession to the future, we're ready to handle being + * called both early and late (i.e. after bus_add_device). Once all + * the platform bus code is reworked to call us late and the notifier + * junk above goes away, move the body of do_iommu_attach here. + */ + group = iommu_group_get(dev); + if (group) { + do_iommu_attach(dev, ops, dma_base, size); + iommu_group_put(group); + } else { + queue_iommu_attach(dev, ops, dma_base, size); + } +} + +#else + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu) +{ } + +#endif /* CONFIG_IOMMU_DMA */ + From 876945dbf6497c7539ef958fee7ade970fbbe17a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Oct 2015 20:14:00 +0100 Subject: [PATCH 15/41] arm64: Hook up IOMMU dma_ops With iommu_dma_ops in place, hook them up to the configuration code, so IOMMU-fronted devices will get them automatically. Acked-by: Catalin Marinas Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/dma-mapping.h | 15 +++++++-------- arch/arm64/mm/dma-mapping.c | 22 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 07d1811aa03f..1f823d1d551d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index cfdb34bedbcd..54d0ead41afc 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -54,16 +54,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - struct iommu_ops *iommu, bool coherent) -{ - if (!acpi_disabled && !dev->archdata.dma_ops) - dev->archdata.dma_ops = dma_ops; - - dev->archdata.dma_coherent = coherent; -} +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops +#ifdef CONFIG_IOMMU_DMA +void arch_teardown_dma_ops(struct device *dev); +#define arch_teardown_dma_ops arch_teardown_dma_ops +#endif + /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 322404193044..6320361d8d4c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -960,6 +960,19 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, } } +void arch_teardown_dma_ops(struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (domain) { + iommu_detach_device(domain, dev); + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); + } + + dev->archdata.dma_ops = NULL; +} + #else static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, @@ -968,3 +981,12 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, #endif /* CONFIG_IOMMU_DMA */ +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent) +{ + if (!acpi_disabled && !dev->archdata.dma_ops) + dev->archdata.dma_ops = dma_ops; + + dev->archdata.dma_coherent = coherent; + __iommu_setup_dma_ops(dev, dma_base, size, iommu); +} From c0733a2cf30c1e7923b6ad4f8df67941502923de Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Oct 2015 17:51:14 +0100 Subject: [PATCH 16/41] iommu/arm-smmu: Fix error checking for ASID and VMID allocation The bitmap allocator returns an int, which is one of the standard negative values on failure. Rather than assigning this straight to a u16 (like we do for the ASID and VMID callers), which means that we won't detect failure correctly, use an int for the purposes of error checking. Cc: Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 637014ab8965..970d78dcac53 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1419,7 +1419,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int ret; - u16 asid; + int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; @@ -1431,10 +1431,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, &cfg->cdptr_dma, GFP_KERNEL); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + ret = -ENOMEM; goto out_free_asid; } - cfg->cd.asid = asid; + cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; @@ -1448,7 +1449,7 @@ out_free_asid: static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { - u16 vmid; + int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -1456,7 +1457,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, if (IS_ERR_VALUE(vmid)) return vmid; - cfg->vmid = vmid; + cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; return 0; From c88ae5de71629e4b5e4075897d9980a7b079d122 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Oct 2015 17:53:24 +0100 Subject: [PATCH 17/41] iommu/arm-smmu: Remove redundant calculation of gr0 base address Since commit 1463fe44fd0f ("iommu/arm-smmu: Don't use VMIDs for stage-1 translations"), we don't need the GR0 base address when initialising a context bank, so remove the useless local variable and its init code. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 9aadf36a0747..7c20a68b5a95 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -709,9 +709,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; - void __iomem *cb_base, *gr0_base, *gr1_base; + void __iomem *cb_base, *gr1_base; - gr0_base = ARM_SMMU_GR0(smmu); gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); From 166bdbd23161160f2abcea70621adba179050bee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 18:32:30 +0100 Subject: [PATCH 18/41] iommu/arm-smmu: Add support for MSI on SMMUv3 Despite being a platform device, the SMMUv3 is capable of signaling interrupts using MSIs. Hook it into the platform MSI framework and enjoy faults being reported in a new and exciting way. Signed-off-by: Marc Zyngier [will: tidied up the binding example and reworked most of the code] Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu-v3.txt | 19 ++++ drivers/iommu/Kconfig | 1 + drivers/iommu/arm-smmu-v3.c | 105 ++++++++++++++++-- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index 3443e0f838df..947863acc2d4 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -36,5 +36,24 @@ the PCIe specification. NOTE: this only applies to the SMMU itself, not masters connected upstream of the SMMU. +- msi-parent : See the generic MSI binding described in + devicetree/bindings/interrupt-controller/msi.txt + for a description of the msi-parent property. + - hisilicon,broken-prefetch-cmd : Avoid sending CMD_PREFETCH_* commands to the SMMU. + +** Example + + smmu@2b400000 { + compatible = "arm,smmu-v3"; + reg = <0x0 0x2b400000 0x0 0x20000>; + interrupts = , + , + , + ; + interrupt-names = "eventq", "priq", "cmdq-sync", "gerror"; + dma-coherent; + #iommu-cells = <0>; + msi-parent = <&its 0xff0000>; + }; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9da766719c8..cc2dfa572d07 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -362,6 +362,7 @@ config ARM_SMMU_V3 depends on ARM64 && PCI select IOMMU_API select IOMMU_IO_PGTABLE_LPAE + select GENERIC_MSI_IRQ_DOMAIN help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 970d78dcac53..5a36ef70f6f4 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -402,6 +403,31 @@ enum pri_resp { PRI_RESP_SUCC, }; +enum arm_smmu_msi_index { + EVTQ_MSI_INDEX, + GERROR_MSI_INDEX, + PRIQ_MSI_INDEX, + ARM_SMMU_MAX_MSIS, +}; + +static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { + [EVTQ_MSI_INDEX] = { + ARM_SMMU_EVTQ_IRQ_CFG0, + ARM_SMMU_EVTQ_IRQ_CFG1, + ARM_SMMU_EVTQ_IRQ_CFG2, + }, + [GERROR_MSI_INDEX] = { + ARM_SMMU_GERROR_IRQ_CFG0, + ARM_SMMU_GERROR_IRQ_CFG1, + ARM_SMMU_GERROR_IRQ_CFG2, + }, + [PRIQ_MSI_INDEX] = { + ARM_SMMU_PRIQ_IRQ_CFG0, + ARM_SMMU_PRIQ_IRQ_CFG1, + ARM_SMMU_PRIQ_IRQ_CFG2, + }, +}; + struct arm_smmu_cmdq_ent { /* Common fields */ u8 opcode; @@ -2176,6 +2202,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 1, ARM_SMMU_POLL_TIMEOUT_US); } +static void arm_smmu_free_msis(void *data) +{ + struct device *dev = data; + platform_msi_domain_free_irqs(dev); +} + +static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + phys_addr_t doorbell; + struct device *dev = msi_desc_to_dev(desc); + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index]; + + doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; + doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT; + + writeq_relaxed(doorbell, smmu->base + cfg[0]); + writel_relaxed(msg->data, smmu->base + cfg[1]); + writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); +} + +static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) +{ + struct msi_desc *desc; + int ret, nvec = ARM_SMMU_MAX_MSIS; + struct device *dev = smmu->dev; + + /* Clear the MSI address regs */ + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + else + nvec--; + + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) + return; + + /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ + ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + return; + } + + for_each_msi_entry(desc, dev) { + switch (desc->platform.msi_index) { + case EVTQ_MSI_INDEX: + smmu->evtq.q.irq = desc->irq; + break; + case GERROR_MSI_INDEX: + smmu->gerr_irq = desc->irq; + break; + case PRIQ_MSI_INDEX: + smmu->priq.q.irq = desc->irq; + break; + default: /* Unknown */ + continue; + } + } + + /* Add callback to free MSIs on teardown */ + devm_add_action(dev, arm_smmu_free_msis, dev); +} + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { int ret, irq; @@ -2189,11 +2281,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) return ret; } - /* Clear the MSI address regs */ - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + arm_smmu_setup_msis(smmu); - /* Request wired interrupt lines */ + /* Request interrupt lines */ irq = smmu->evtq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2222,8 +2312,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) } if (smmu->features & ARM_SMMU_FEAT_PRI) { - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); - irq = smmu->priq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2597,13 +2685,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (ret) return ret; + /* Record our private device structure */ + platform_set_drvdata(pdev, smmu); + /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) goto out_free_structures; - /* Record our private device structure */ - platform_set_drvdata(pdev, smmu); return 0; out_free_structures: From f34c73f55a06ac443c492fc11b85f7a44dfdc112 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:34 +0200 Subject: [PATCH 19/41] iommu/amd: Do not BUG_ON in __detach_device() The condition in the BUG_ON is an indicator of a BUG, but no reason to kill the code path. Turn it into a WARN_ON and bail out if it is hit. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f82060e778a2..6070b1504497 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2204,7 +2204,8 @@ static void __detach_device(struct iommu_dev_data *dev_data) struct protection_domain *domain; unsigned long flags; - BUG_ON(!dev_data->domain); + if (WARN_ON(!dev_data->domain)) + return; domain = dev_data->domain; From 150952f96941cb13371770ce0cf47906647d8bba Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:35 +0200 Subject: [PATCH 20/41] iommu/amd: Do not iterate over alias-list in __[attach|detach]_device The alias list is handled aleady by iommu core code. No need anymore to handle it in this part of the AMD IOMMU code Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6070b1504497..e8443e6ff1b1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2028,29 +2028,17 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct iommu_dev_data *head, *entry; int ret; /* lock domain */ spin_lock(&domain->lock); - head = dev_data; - - if (head->alias_data != NULL) - head = head->alias_data; - - /* Now we have the root of the alias group, if any */ - ret = -EBUSY; - if (head->domain != NULL) + if (dev_data->domain != NULL) goto out_unlock; /* Attach alias group root */ - do_attach(head, domain); - - /* Attach other devices in the alias group */ - list_for_each_entry(entry, &head->alias_list, alias_list) - do_attach(entry, domain); + do_attach(dev_data, domain); ret = 0; @@ -2200,7 +2188,6 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct iommu_dev_data *dev_data) { - struct iommu_dev_data *head, *entry; struct protection_domain *domain; unsigned long flags; @@ -2211,14 +2198,7 @@ static void __detach_device(struct iommu_dev_data *dev_data) spin_lock_irqsave(&domain->lock, flags); - head = dev_data; - if (head->alias_data != NULL) - head = head->alias_data; - - list_for_each_entry(entry, &head->alias_list, alias_list) - do_detach(entry); - - do_detach(head); + do_detach(dev_data); spin_unlock_irqrestore(&domain->lock, flags); } From f1dd0a8bcd67e34537fe02fa9cda15aa46368d11 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:36 +0200 Subject: [PATCH 21/41] iommu/amd: Don't disable IRQs in __detach_device This function is already called with IRQs disabled already. So no need to disable them again. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e8443e6ff1b1..07f491c2dc64 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2189,18 +2189,17 @@ static int attach_device(struct device *dev, static void __detach_device(struct iommu_dev_data *dev_data) { struct protection_domain *domain; - unsigned long flags; if (WARN_ON(!dev_data->domain)) return; domain = dev_data->domain; - spin_lock_irqsave(&domain->lock, flags); + spin_lock(&domain->lock); do_detach(dev_data); - spin_unlock_irqrestore(&domain->lock, flags); + spin_unlock(&domain->lock); } /* From 272e4f99e966989394b167b695ab489c60fe1243 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:37 +0200 Subject: [PATCH 22/41] iommu/amd: WARN when __[attach|detach]_device are called with irqs enabled These functions rely on being called with IRQs disabled. Add a WARN_ON to detect early when its not. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 07f491c2dc64..f8f54a4b1d8f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2030,6 +2030,12 @@ static int __attach_device(struct iommu_dev_data *dev_data, { int ret; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); + /* lock domain */ spin_lock(&domain->lock); @@ -2190,6 +2196,12 @@ static void __detach_device(struct iommu_dev_data *dev_data) { struct protection_domain *domain; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); + if (WARN_ON(!dev_data->domain)) return; From e25bfb56ea7f046b71414e02f80f620deb5c6362 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:38 +0200 Subject: [PATCH 23/41] iommu/amd: Set alias DTE in do_attach/do_detach With this we don't have to create dev_data entries for non-existent devices (which only exist as request-ids). Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f8f54a4b1d8f..68b8ecf075ca 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1114,11 +1114,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ret = iommu_flush_dte(iommu, dev_data->devid); + if (!ret && alias != dev_data->devid) + ret = iommu_flush_dte(iommu, alias); if (ret) return ret; @@ -1984,29 +1988,36 @@ static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu; + u16 alias; bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ats = dev_data->ats.enabled; /* Update data structures */ dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(dev_data->devid, domain, ats); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Flush the DTE entry */ + /* Update device table */ + set_dte_entry(dev_data->devid, domain, ats); + if (alias != dev_data->devid) + set_dte_entry(dev_data->devid, domain, ats); + device_flush_dte(dev_data); } static void do_detach(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; @@ -2016,6 +2027,8 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); + if (alias != dev_data->devid) + clear_dte_entry(alias); /* Flush the DTE entry */ device_flush_dte(dev_data); From 61289cbaf6c854a493ad0fa405c3dd39e7a384f3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:39 +0200 Subject: [PATCH 24/41] iommu/amd: Remove old alias handling code This mostly removes the code to create dev_data structures for alias device ids. They are not necessary anymore, as they were only created for device ids which have no struct pci_dev associated with it. But these device ids are handled in a simpler way now, so there is no need for this code anymore. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 99 --------------------------------------- 1 file changed, 99 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 68b8ecf075ca..fe112bbaae88 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops; struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct list_head dev_data_list; /* For global dev_data_list */ - struct list_head alias_list; /* Link alias-groups together */ - struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ @@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; - INIT_LIST_HEAD(&dev_data->alias_list); - dev_data->devid = devid; spin_lock_irqsave(&dev_data_list_lock, flags); @@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) return dev_data; } -static void free_dev_data(struct iommu_dev_data *dev_data) -{ - unsigned long flags; - - spin_lock_irqsave(&dev_data_list_lock, flags); - list_del(&dev_data->dev_data_list); - spin_unlock_irqrestore(&dev_data_list_lock, flags); - - kfree(dev_data); -} - static struct iommu_dev_data *search_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -311,73 +296,10 @@ out: iommu_group_put(group); } -static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) -{ - *(u16 *)data = alias; - return 0; -} - -static u16 get_alias(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid, ivrs_alias, pci_alias; - - devid = get_device_id(dev); - ivrs_alias = amd_iommu_alias_table[devid]; - pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); - - if (ivrs_alias == pci_alias) - return ivrs_alias; - - /* - * DMA alias showdown - * - * The IVRS is fairly reliable in telling us about aliases, but it - * can't know about every screwy device. If we don't have an IVRS - * reported alias, use the PCI reported alias. In that case we may - * still need to initialize the rlookup and dev_table entries if the - * alias is to a non-existent device. - */ - if (ivrs_alias == devid) { - if (!amd_iommu_rlookup_table[pci_alias]) { - amd_iommu_rlookup_table[pci_alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[pci_alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[pci_alias].data)); - } - - return pci_alias; - } - - pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " - "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, - PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), - PCI_FUNC(pci_alias)); - - /* - * If we don't have a PCI DMA alias and the IVRS alias is on the same - * bus, then the IVRS table may know about a quirk that we don't. - */ - if (pci_alias == devid && - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { - pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; - pdev->dma_alias_devfn = ivrs_alias & 0xff; - pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); - } - - return ivrs_alias; -} - static int iommu_init_device(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; - u16 alias; if (dev->archdata.iommu) return 0; @@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - alias = get_alias(dev); - - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; - - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; - - /* Add device to the alias_list */ - list_add(&dev_data->alias_list, &alias_data->alias_list); - } - if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev) iommu_group_remove_device(dev); - /* Unlink from alias, it may change if another device is re-plugged */ - dev_data->alias_data = NULL; - /* Remove dma-ops */ dev->archdata.dma_ops = NULL; From ca9cab3a5a9ea2ff03d761494b002897e2e3c4b5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:40 +0200 Subject: [PATCH 25/41] iommu/amd: Align DTE flag definitions No functional change. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f65908841be0..3026b3448b98 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -295,8 +295,8 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) -#define DTE_FLAG_IOTLB (0x01UL << 32) -#define DTE_FLAG_GV (0x01ULL << 55) +#define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GV (1ULL << 55) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) From deba4bce168a87ef90211ba69850d3428b453765 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:41 +0200 Subject: [PATCH 26/41] iommu/amd: Remove cmd_buf_size and evt_buf_size from struct amd_iommu The driver always uses a constant size for these buffers anyway, so there is no need to waste memory to store the sizes. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 10 ++++------ drivers/iommu/amd_iommu_init.c | 8 +------- drivers/iommu/amd_iommu_types.h | 4 ---- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fe112bbaae88..02b8ad761600 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -534,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) while (head != tail) { iommu_print_event(iommu, iommu->evt_buf + head); - head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; } writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); @@ -684,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, u8 *target; target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); @@ -851,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu, u32 left, tail, head, next_tail; unsigned long flags; - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); - again: spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - left = (head - next_tail) % iommu->cmd_buf_size; + next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + left = (head - next_tail) % CMD_BUFFER_SIZE; if (left <= 2) { struct iommu_cmd sync_cmd; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3ba1ee709824..3a977a751dad 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -521,8 +521,6 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) if (cmd_buf == NULL) return NULL; - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; - return cmd_buf; } @@ -557,13 +555,11 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } /* allocates the memory where the IOMMU will log its events to */ @@ -575,8 +571,6 @@ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) if (iommu->evt_buf == NULL) return NULL; - iommu->evt_buf_size = EVT_BUFFER_SIZE; - return iommu->evt_buf; } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 3026b3448b98..921b2e95161b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -528,11 +528,7 @@ struct amd_iommu { /* command buffer virtual address */ u8 *cmd_buf; - /* size of command buffer */ - u32 cmd_buf_size; - /* size of event buffer */ - u32 evt_buf_size; /* event buffer virtual address */ u8 *evt_buf; From f2c2db53b99eb6c48e259a722eef724d6b53b156 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:42 +0200 Subject: [PATCH 27/41] iommu/amd: Cleanup buffer allocation Clean up the functions to allocate the command, event and ppr-log buffers. Remove redundant code and change the return value to int. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 46 ++++++++++++---------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3a977a751dad..9c6efcb6527e 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -513,15 +513,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) * write commands to that buffer later and the IOMMU will execute them * asynchronously */ -static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +static int __init alloc_command_buffer(struct amd_iommu *iommu) { - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(CMD_BUFFER_SIZE)); - if (cmd_buf == NULL) - return NULL; - - return cmd_buf; + return iommu->cmd_buf ? 0 : -ENOMEM; } /* @@ -563,15 +560,12 @@ static void __init free_command_buffer(struct amd_iommu *iommu) } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); - if (iommu->evt_buf == NULL) - return NULL; - - return iommu->evt_buf; + return iommu->evt_buf ? 0 : -ENOMEM; } static void iommu_enable_event_buffer(struct amd_iommu *iommu) @@ -598,15 +592,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu) } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) +static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); + iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(PPR_LOG_SIZE)); - if (iommu->ppr_log == NULL) - return NULL; - - return iommu->ppr_log; + return iommu->ppr_log ? 0 : -ENOMEM; } static void iommu_enable_ppr_log(struct amd_iommu *iommu) @@ -1105,12 +1096,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; - iommu->cmd_buf = alloc_command_buffer(iommu); - if (!iommu->cmd_buf) + if (alloc_command_buffer(iommu)) return -ENOMEM; - iommu->evt_buf = alloc_event_buffer(iommu); - if (!iommu->evt_buf) + if (alloc_event_buffer(iommu)) return -ENOMEM; iommu->int_enabled = false; @@ -1299,11 +1288,8 @@ static int iommu_init_pci(struct amd_iommu *iommu) amd_iommu_v2_present = true; } - if (iommu_feature(iommu, FEATURE_PPR)) { - iommu->ppr_log = alloc_ppr_log(iommu); - if (!iommu->ppr_log) - return -ENOMEM; - } + if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; From d1259416985513ba97f75a63ecf5bc75592a4b8d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:43 +0200 Subject: [PATCH 28/41] iommu/amd: Initialize amd_iommu_last_bdf for DEV_ALL Also initialize the amd_iommu_last_bdf variable when a IVHD_DEV_ALL entry is found in the ACPI table. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 9c6efcb6527e..eca678242e4d 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -441,6 +441,10 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) while (p < end) { dev = (struct ivhd_entry *)p; switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(0xffff); + break; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: From 226e889b20a99c073615ff5f5b6ea0bbccf25c5f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:44 +0200 Subject: [PATCH 29/41] iommu/amd: Remove first/last_device handling The code is buggy and the values read from PCI are not reliable anyway, so it is the best to just remove this code. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 34 +++------------------------------ drivers/iommu/amd_iommu_types.h | 5 ----- 2 files changed, 3 insertions(+), 36 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index eca678242e4d..71734f7bc248 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -824,20 +824,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" - " last device %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(iommu->first_device), - PCI_SLOT(iommu->first_device), - PCI_FUNC(iommu->first_device), - PCI_BUS_NUM(iommu->last_device), - PCI_SLOT(iommu->last_device), - PCI_FUNC(iommu->last_device), - e->flags); + DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = iommu->first_device; - dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, - e->flags, 0); + for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: @@ -993,17 +983,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } -/* Initializes the device->iommu mapping for the driver */ -static int __init init_iommu_devices(struct amd_iommu *iommu) -{ - u32 i; - - for (i = iommu->first_device; i <= iommu->last_device; ++i) - set_iommu_for_device(iommu, i); - - return 0; -} - static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); @@ -1122,8 +1101,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ amd_iommu_rlookup_table[iommu->devid] = NULL; - init_iommu_devices(iommu); - return 0; } @@ -1250,11 +1227,6 @@ static int iommu_init_pci(struct amd_iommu *iommu) pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, &misc); - iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_FD(range)); - iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_LD(range)); - if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 921b2e95161b..fc9501dc7067 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -516,11 +516,6 @@ struct amd_iommu { /* pci domain of this IOMMU */ u16 pci_seg; - /* first device this IOMMU handles. read from PCI */ - u16 first_device; - /* last device this IOMMU handles. read from PCI */ - u16 last_device; - /* start of exclusion range of that IOMMU */ u64 exclusion_start; /* length of exclusion range of that IOMMU */ From 393c092a4d3ace7b8cef633ede2171104dd504d7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 17:33:45 +0200 Subject: [PATCH 30/41] iommu/amd: Remove find_last_devid_on_pci() The value read from the PCI header is not reliable, so remove this code. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 71734f7bc248..dd92a8d8f0ca 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -407,20 +407,6 @@ static inline int ivhd_entry_length(u8 *ivhd) return 0x04 << (*ivhd >> 6); } -/* - * This function reads the last device id the IOMMU has to handle from the PCI - * capability header for this IOMMU - */ -static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) -{ - u32 cap; - - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); - - return 0; -} - /* * After reading the highest device id from the IOMMU PCI capability header * this function looks if there is a higher device id defined in the ACPI table @@ -433,11 +419,6 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) p += sizeof(*h); end += h->length; - find_last_devid_on_pci(PCI_BUS_NUM(h->devid), - PCI_SLOT(h->devid), - PCI_FUNC(h->devid), - h->cap_ptr); - while (p < end) { dev = (struct ivhd_entry *)p; switch (dev->type) { From 46c6b2bc88a729366605d0dedb6a35b8cf7cc4f0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:36 +0200 Subject: [PATCH 31/41] iommu: Revive device_group iommu-ops call-back That call-back is currently unused, change it into a call-back function for finding the right IOMMU group for a device. This is a first step to remove the hard-coded PCI dependency in the iommu-group code. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 ++++++--- include/linux/iommu.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 049df495c274..563746383973 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -814,6 +814,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) */ struct iommu_group *iommu_group_get_for_dev(struct device *dev) { + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; int ret; @@ -821,10 +822,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - if (!dev_is_pci(dev)) - return ERR_PTR(-EINVAL); + group = ERR_PTR(-EINVAL); - group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + if (ops && ops->device_group) + group = ops->device_group(dev); + else if (dev_is_pci(dev)) + group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); if (IS_ERR(group)) return group; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f174506a5343..a1e6e8914c17 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -168,7 +168,7 @@ struct iommu_ops { phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); - int (*device_group)(struct device *dev, unsigned int *groupid); + struct iommu_group *(*device_group)(struct device *dev); int (*domain_get_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); int (*domain_set_attr)(struct iommu_domain *domain, From 5e62292bad10cff25ff75d136c54e62b43bfb0fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:37 +0200 Subject: [PATCH 32/41] iommu: Export and rename iommu_group_get_for_pci_dev() Rename that function to pci_device_group() and export it, so that IOMMU drivers can use it as their device_group call-back. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 563746383973..fdea700ca12c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -731,13 +731,17 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. */ -static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +struct iommu_group *pci_device_group(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct group_for_pci_data data; struct pci_bus *bus; struct iommu_group *group = NULL; u64 devfns[4] = { 0 }; + if (WARN_ON(!dev_is_pci(dev))) + return ERR_PTR(-EINVAL); + /* * Find the upstream DMA alias for the device. A device must not * be aliased due to topology in order to have its own IOMMU group. @@ -827,7 +831,7 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (ops && ops->device_group) group = ops->device_group(dev); else if (dev_is_pci(dev)) - group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + group = pci_device_group(dev); if (IS_ERR(group)) return group; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1e6e8914c17..a1a06639a64a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -317,6 +317,9 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, return domain->ops->map_sg(domain, iova, sg, nents, prot); } +/* PCI device grouping function */ +extern struct iommu_group *pci_device_group(struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; From 6eab556a40384de94c2d03c8d9d632e5154367f5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:38 +0200 Subject: [PATCH 33/41] iommu: Add generic_device_group() function This function can be used as a device_group call-back and just allocates one iommu-group per device. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 15 +++++++++++++++ include/linux/iommu.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fdea700ca12c..a80c9c5c2650 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -727,6 +727,21 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) return data->group != NULL; } +/* + * Generic device_group call-back function. It just allocates one + * iommu-group per device. + */ +struct iommu_group *generic_device_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return NULL; + + return group; +} + /* * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1a06639a64a..f28dff313b07 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -319,6 +319,8 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); +/* Generic device grouping function */ +extern struct iommu_group *generic_device_group(struct device *dev); #else /* CONFIG_IOMMU_API */ From a960fadbe66e332b82595b3f26f00078caf0310f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:39 +0200 Subject: [PATCH 34/41] iommu: Add device_group call-back to x86 iommu drivers Set the device_group call-back to pci_device_group() for the Intel VT-d and the AMD IOMMU driver. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 + drivers/iommu/intel-iommu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f82060e778a2..8d74e38914dd 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3189,6 +3189,7 @@ static const struct iommu_ops amd_iommu_ops = { .iova_to_phys = amd_iommu_iova_to_phys, .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, + .device_group = pci_device_group, .get_dm_regions = amd_iommu_get_dm_regions, .put_dm_regions = amd_iommu_put_dm_regions, .pgsize_bitmap = AMD_IOMMU_PGSIZES, diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 041bc1810a86..884d9133c636 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4872,6 +4872,7 @@ static const struct iommu_ops intel_iommu_ops = { .iova_to_phys = intel_iommu_iova_to_phys, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, + .device_group = pci_device_group, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; From d5e582971576698ab3e0236359f1ab5b91686047 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:40 +0200 Subject: [PATCH 35/41] iommu/fsl: Convert to device_group call-back Convert the fsl pamu driver to make use of the new device_group call-back. Cc: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 41 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 1d452930c890..da0e1e30ef37 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ if (pci_endpt_partioning) { - group = iommu_group_get_for_dev(&pdev->dev); + group = pci_device_group(&pdev->dev); /* * PCIe controller is not a paritionable entity @@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) return group; } -static int fsl_pamu_add_device(struct device *dev) +static struct iommu_group *fsl_pamu_device_group(struct device *dev) { struct iommu_group *group = ERR_PTR(-ENODEV); - struct pci_dev *pdev; - const u32 *prop; - int ret = 0, len; + int len; /* * For platform devices we allocate a separate group for * each of the devices. */ - if (dev_is_pci(dev)) { - pdev = to_pci_dev(dev); - /* Don't create device groups for virtual PCI bridges */ - if (pdev->subordinate) - return 0; + if (dev_is_pci(dev)) + group = get_pci_device_group(to_pci_dev(dev)); + else if (of_get_property(dev->of_node, "fsl,liodn", &len)) + group = get_device_iommu_group(dev); - group = get_pci_device_group(pdev); + return group; +} - } else { - prop = of_get_property(dev->of_node, "fsl,liodn", &len); - if (prop) - group = get_device_iommu_group(dev); - } +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); - /* - * Check if device has already been added to an iommu group. - * Group could have already been created for a PCI device in - * the iommu_group_get_for_dev path. - */ - if (!dev->iommu_group) - ret = iommu_group_add_device(group, dev); - iommu_group_put(group); - return ret; + + return 0; } static void fsl_pamu_remove_device(struct device *dev) @@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_get_attr = fsl_pamu_get_domain_attr, .add_device = fsl_pamu_add_device, .remove_device = fsl_pamu_remove_device, + .device_group = fsl_pamu_device_group, }; int __init pamu_domain_init(void) From af65993224c1cfd40b81080c95c6c68d41fc46c7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:41 +0200 Subject: [PATCH 36/41] iommu/arm-smmu: Switch to device_group call-back This converts the ARM SMMU and the SMMUv3 driver to use the new device_group call-back. Cc: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 1 + drivers/iommu/arm-smmu.c | 77 ++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index dafaf59dc3b8..fbd4fedd5162 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1898,6 +1898,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = pci_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 48a39dfa9777..b4c0629d7f7d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1292,33 +1292,25 @@ static void __arm_smmu_release_pci_iommudata(void *data) kfree(data); } -static int arm_smmu_add_pci_device(struct pci_dev *pdev) +static int arm_smmu_init_pci_device(struct pci_dev *pdev, + struct iommu_group *group) { - int i, ret; - u16 sid; - struct iommu_group *group; struct arm_smmu_master_cfg *cfg; - - group = iommu_group_get_for_dev(&pdev->dev); - if (IS_ERR(group)) - return PTR_ERR(group); + u16 sid; + int i; cfg = iommu_group_get_iommudata(group); if (!cfg) { cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) { - ret = -ENOMEM; - goto out_put_group; - } + if (!cfg) + return -ENOMEM; iommu_group_set_iommudata(group, cfg, __arm_smmu_release_pci_iommudata); } - if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) { - ret = -ENOSPC; - goto out_put_group; - } + if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) + return -ENOSPC; /* * Assume Stream ID == Requester ID for now. @@ -1334,16 +1326,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev) cfg->streamids[cfg->num_streamids++] = sid; return 0; -out_put_group: - iommu_group_put(group); - return ret; } -static int arm_smmu_add_platform_device(struct device *dev) +static int arm_smmu_init_platform_device(struct device *dev, + struct iommu_group *group) { - struct iommu_group *group; - struct arm_smmu_master *master; struct arm_smmu_device *smmu = find_smmu_for_device(dev); + struct arm_smmu_master *master; if (!smmu) return -ENODEV; @@ -1352,21 +1341,20 @@ static int arm_smmu_add_platform_device(struct device *dev) if (!master) return -ENODEV; - /* No automatic group creation for platform devices */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - iommu_group_set_iommudata(group, &master->cfg, NULL); - return iommu_group_add_device(group, dev); + + return 0; } static int arm_smmu_add_device(struct device *dev) { - if (dev_is_pci(dev)) - return arm_smmu_add_pci_device(to_pci_dev(dev)); + struct iommu_group *group; - return arm_smmu_add_platform_device(dev); + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + return 0; } static void arm_smmu_remove_device(struct device *dev) @@ -1374,6 +1362,32 @@ static void arm_smmu_remove_device(struct device *dev) iommu_group_remove_device(dev); } +static struct iommu_group *arm_smmu_device_group(struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + + if (IS_ERR(group)) + return group; + + if (dev_is_pci(dev)) + ret = arm_smmu_init_pci_device(to_pci_dev(dev), group); + else + ret = arm_smmu_init_platform_device(dev, group); + + if (ret) { + iommu_group_put(group); + group = ERR_PTR(ret); + } + + return group; +} + static int arm_smmu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { @@ -1430,6 +1444,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = arm_smmu_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ From 391811e185408671180745dfd30914bb64f6368e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:42 +0200 Subject: [PATCH 37/41] iommu: Remove is_pci_dev() fall-back from iommu_group_get_for_dev All callers of iommu_group_get_for_dev() provide a device_group call-back now, so this fall-back is no longer needed. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a80c9c5c2650..e2b5526506fd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -845,8 +845,6 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (ops && ops->device_group) group = ops->device_group(dev); - else if (dev_is_pci(dev)) - group = pci_device_group(dev); if (IS_ERR(group)) return group; From 1228236de5f978970fb814cc27138cdb00cbb48d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:43 +0200 Subject: [PATCH 38/41] iommu: Move default domain allocation to iommu_group_get_for_dev() Now that the iommu core support for iommu groups is not pci-centric anymore, we can move default domain allocation to the bus independent iommu_group_get_for_dev() function. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e2b5526506fd..abae363c7b9b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -810,14 +810,6 @@ struct iommu_group *pci_device_group(struct device *dev) if (IS_ERR(group)) return NULL; - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. - */ - group->default_domain = __iommu_domain_alloc(pdev->dev.bus, - IOMMU_DOMAIN_DMA); - group->domain = group->default_domain; - return group; } @@ -849,6 +841,16 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (IS_ERR(group)) return group; + /* + * Try to allocate a default domain - needs support from the + * IOMMU driver. + */ + if (!group->default_domain) { + group->default_domain = __iommu_domain_alloc(dev->bus, + IOMMU_DOMAIN_DMA); + group->domain = group->default_domain; + } + ret = iommu_group_add_device(group, dev); if (ret) { iommu_group_put(group); From a13c8f27e4d79420ae7bcb48123fa13851026428 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 22 Oct 2015 14:00:51 +0800 Subject: [PATCH 39/41] iommu/vt-d: Adjust the return value of the parse_ioapics_under_ir Adjust the return value of parse_ioapics_under_ir as negative value representing failure and "0" representing succcess. Just make it consistent with other function implementations, and we can judge if calling is successfull by if (!parse_ioapics_under_ir()) style. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index a50468e7d8c2..7843252afda8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -690,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void) if (!dmar_ir_support()) return -ENODEV; - if (parse_ioapics_under_ir() != 1) { + if (!parse_ioapics_under_ir()) { pr_info("Not enabling interrupt remapping\n"); goto error; } @@ -943,7 +943,7 @@ static int __init parse_ioapics_under_ir(void) } if (!ir_supported) - return 0; + return -ENODEV; for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { int ioapic_id = mpc_ioapic_id(ioapic_idx); @@ -955,7 +955,7 @@ static int __init parse_ioapics_under_ir(void) } } - return 1; + return 0; } static int __init ir_dev_scope_init(void) From 66ef950d1841487c852d68d00977726f1cd9f053 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 23 Oct 2015 11:57:13 +0200 Subject: [PATCH 40/41] iommu/vt-d: Propagate error-value from ir_parse_ioapic_hpet_scope() Propagate the error-value from the function ir_parse_ioapic_hpet_scope() in parse_ioapics_under_ir() and cleanup its calling loop. Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 7843252afda8..f3a9bd1bea43 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -934,13 +934,18 @@ static int __init parse_ioapics_under_ir(void) bool ir_supported = false; int ioapic_idx; - for_each_iommu(iommu, drhd) - if (ecap_ir_support(iommu->ecap)) { - if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) - return -1; + for_each_iommu(iommu, drhd) { + int ret; - ir_supported = true; - } + if (!ecap_ir_support(iommu->ecap)) + continue; + + ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); + if (ret) + return ret; + + ir_supported = true; + } if (!ir_supported) return -ENODEV; From b61e5e80e3cd37f0183cdc04b03861a0467d9f52 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 2 Nov 2015 19:57:31 +0900 Subject: [PATCH 41/41] iommu/vt-d: Fix return value check of parse_ioapics_under_ir() The function returns 0 on success, so check for the right value. Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index f3a9bd1bea43..1fae1881648c 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -690,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void) if (!dmar_ir_support()) return -ENODEV; - if (!parse_ioapics_under_ir()) { + if (parse_ioapics_under_ir()) { pr_info("Not enabling interrupt remapping\n"); goto error; }