From 1c98025c6c95bc057a25e2c6596de23288c68160 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 8 Aug 2014 18:40:42 -0500 Subject: [PATCH 001/130] powerpc: Dynamic DMA zone limits Platform code can call limit_zone_pfn() to set appropriate limits for ZONE_DMA and ZONE_DMA32, and dma_direct_alloc_coherent() will select a suitable zone based on a device's mask and the pfn limits that platform code has configured. Signed-off-by: Scott Wood Cc: Shaohui Xie --- arch/powerpc/Kconfig | 4 ++ arch/powerpc/include/asm/pgtable.h | 3 ++ arch/powerpc/kernel/dma.c | 20 ++++++++++ arch/powerpc/mm/mem.c | 61 +++++++++++++++++++++++++++--- 4 files changed, 83 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bd6dd6ed3a9f..d003409b5f98 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -285,6 +285,10 @@ config PPC_EMULATE_SSTEP bool default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT +config ZONE_DMA32 + bool + default y if PPC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index d98c1ecc3266..6d74167bb6bf 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include +#include #include /* For TASK_SIZE */ #include #include @@ -281,6 +282,8 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; +void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn); +int dma_pfn_limit_to_zone(u64 pfn_limit); extern void paging_init(void); /* diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index ee78f6e49d64..dfd99ef8c63d 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -40,6 +40,26 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, #else struct page *page; int node = dev_to_node(dev); + u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; + int zone; + + zone = dma_pfn_limit_to_zone(pfn); + if (zone < 0) { + dev_err(dev, "%s: No suitable zone for pfn %#llx\n", + __func__, pfn); + return NULL; + } + + switch (zone) { + case ZONE_DMA: + flag |= GFP_DMA; + break; +#ifdef CONFIG_ZONE_DMA32 + case ZONE_DMA32: + flag |= GFP_DMA32; + break; +#endif + }; /* ignore region specifiers */ flag &= ~(__GFP_HIGHMEM); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2c8e90f5789e..687e7f7f7751 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -260,6 +260,54 @@ static int __init mark_nonram_nosave(void) return 0; } +static bool zone_limits_final; + +static unsigned long max_zone_pfns[MAX_NR_ZONES] = { + [0 ... MAX_NR_ZONES - 1] = ~0UL +}; + +/* + * Restrict the specified zone and all more restrictive zones + * to be below the specified pfn. May not be called after + * paging_init(). + */ +void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) +{ + int i; + + if (WARN_ON(zone_limits_final)) + return; + + for (i = zone; i >= 0; i--) { + if (max_zone_pfns[i] > pfn_limit) + max_zone_pfns[i] = pfn_limit; + } +} + +/* + * Find the least restrictive zone that is entirely below the + * specified pfn limit. Returns < 0 if no suitable zone is found. + * + * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit + * systems -- the DMA limit can be higher than any possible real pfn. + */ +int dma_pfn_limit_to_zone(u64 pfn_limit) +{ + enum zone_type top_zone = ZONE_NORMAL; + int i; + +#ifdef CONFIG_HIGHMEM + top_zone = ZONE_HIGHMEM; +#endif + + for (i = top_zone; i >= 0; i--) { + if (max_zone_pfns[i] <= pfn_limit) + return i; + } + + return -EPERM; +} + /* * paging_init() sets up the page tables - in fact we've already done this. */ @@ -267,7 +315,7 @@ void __init paging_init(void) { unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); - unsigned long max_zone_pfns[MAX_NR_ZONES]; + enum zone_type top_zone; #ifdef CONFIG_PPC32 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); @@ -289,13 +337,16 @@ void __init paging_init(void) (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_HIGHMEM; + limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); #else - max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_NORMAL; #endif + + limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT); + zone_limits_final = true; free_area_init_nodes(max_zone_pfns); mark_nonram_nosave(); From 6397fc3fb05e73062b9790cfe2760e209ebd7b95 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 8 Aug 2014 18:40:43 -0500 Subject: [PATCH 002/130] powerpc/64: Honor swiotlb limit in coherent allocations FSL PCI cannot directly address the whole lower 4 GiB due to conflicts with PCICSRBAR and outbound windows, and thus max_direct_dma_addr is less than 4GiB. Honor that limit in dma_direct_alloc_coherent(). Note that setting the DMA mask to 31 bits is not an option, since many PCI drivers would fail if we reject 32-bit DMA in dma_supported(), and we have no control over the setting of coherent_dma_mask if dma_supported() returns true. Signed-off-by: Scott Wood Cc: Shaohui Xie --- arch/powerpc/kernel/dma.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index dfd99ef8c63d..a7b0156586dd 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * Generic direct DMA implementation @@ -25,6 +26,18 @@ * default the offset is PCI_DRAM_OFFSET. */ +static u64 __maybe_unused get_pfn_limit(struct device *dev) +{ + u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; + struct dev_archdata __maybe_unused *sd = &dev->archdata; + +#ifdef CONFIG_SWIOTLB + if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); +#endif + + return pfn; +} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, @@ -40,7 +53,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, #else struct page *page; int node = dev_to_node(dev); - u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; + u64 pfn = get_pfn_limit(dev); int zone; zone = dma_pfn_limit_to_zone(pfn); From cf5621032fbe7354c0c3fff17958b7029bbc9f30 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 8 Aug 2014 18:40:44 -0500 Subject: [PATCH 003/130] powerpc/64: Limit ZONE_DMA32 to 4GiB in swiotlb_detect_4g() A DMA zone is still needed with swiotlb, for coherent allocations. This doesn't affect platforms that don't use swiotlb or that don't call swiotlb_detect_4g(). Signed-off-by: Scott Wood Cc: Shaohui Xie --- arch/powerpc/kernel/dma-swiotlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index bd1a2aba599f..735979764cd4 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -106,10 +106,14 @@ int __init swiotlb_setup_bus_notifier(void) return 0; } -void swiotlb_detect_4g(void) +void __init swiotlb_detect_4g(void) { - if ((memblock_end_of_DRAM() - 1) > 0xffffffff) + if ((memblock_end_of_DRAM() - 1) > 0xffffffff) { ppc_swiotlb_enable = 1; +#ifdef CONFIG_ZONE_DMA32 + limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT); +#endif + } } static int __init swiotlb_late_init(void) From 84f44cc56c0994df7778bde55c8d5e9aa9fff8f5 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 8 Aug 2014 18:40:45 -0500 Subject: [PATCH 004/130] powerpc/fsl-pci: Limit ZONE_DMA32 to 2GiB on 64-bit platforms FSL PCI cannot directly address the whole lower 4 GiB due to conflicts with PCICSRBAR and outbound windows. By the time max_direct_dma_addr is set to the precise limit, it will be too late to alter the zone limits, but we should always have at least 2 GiB mapped (unless RAM is smaller than that). Signed-off-by: Scott Wood Cc: Shaohui Xie --- arch/powerpc/platforms/85xx/corenet_generic.c | 11 +++++++++++ arch/powerpc/platforms/85xx/qemu_e500.c | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index d22dd85e50bf..c2adb00228c5 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,16 @@ void __init corenet_gen_setup_arch(void) swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif + pr_info("%s board\n", ppc_md.name); mpc85xx_qe_init(); diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 7f2673293549..8ad2fe6f200a 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,15 @@ static void __init qemu_e500_setup_arch(void) fsl_pci_assign_primary(); swiotlb_detect_4g(); +#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32) + /* + * Inbound windows don't cover the full lower 4 GiB + * due to conflicts with PCICSRBAR and outbound windows, + * so limit the DMA32 zone to 2 GiB, to allow consistent + * allocations to succeed. + */ + limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT)); +#endif mpc85xx_smp_init(); } From 67e35c3a79b7349a9b0dbe1dd0bf82def0296714 Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Wed, 13 Aug 2014 16:55:13 +0300 Subject: [PATCH 005/130] powerpc/fsl_msi: support vmpic msi with mpic 4.3 The new MSI block in MPIC 4.3 added the MSIIR1 register, with a different layout, in order to support 16 MSIR registers. The msi binding was also updated so that the "reg" reflects the newly introduced MSIIR1 register. Virtual machines advertise these msi nodes by using the compatible "fsl,vmpic-msi-v4.3" so add support for it. Signed-off-by: Laurentiu Tudor Cc: Scott Wood Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 77efbaec7b9c..9fadc6e4dc47 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -180,7 +180,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) np = of_parse_phandle(hose->dn, "fsl,msi", 0); if (np) { if (of_device_is_compatible(np, "fsl,mpic-msi") || - of_device_is_compatible(np, "fsl,vmpic-msi")) + of_device_is_compatible(np, "fsl,vmpic-msi") || + of_device_is_compatible(np, "fsl,vmpic-msi-v4.3")) phandle = np->phandle; else { dev_err(&pdev->dev, @@ -466,7 +467,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len); - if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3")) { + if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") || + of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) { msi->srs_shift = MSIIR1_SRS_SHIFT; msi->ibs_shift = MSIIR1_IBS_SHIFT; if (p) @@ -572,6 +574,10 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,vmpic-msi", .data = &vmpic_msi_feature, }, + { + .compatible = "fsl,vmpic-msi-v4.3", + .data = &vmpic_msi_feature, + }, #endif {} }; From 26ae4980b5e4739af93543a147facb421fb78ae8 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Fri, 15 Aug 2014 16:07:48 -0500 Subject: [PATCH 006/130] fsl_ifc: Fix csor_ext position in fsl_ifc_regs According to Freescale manuals, the IFC_CSORn_EXT register is located immediately _after_ the bank's IFC_CSORn register. This patch adjusts the csor_ext member of and reserved register arrays immediately surrounding the csor_cs structure to provide proper access to this register. Signed-off-by: Aaron Sierra Acked-by: Prabhakar Kushwaha Signed-off-by: Scott Wood --- include/linux/fsl_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index f49ddb1b2273..84d60cb841b1 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -781,13 +781,13 @@ struct fsl_ifc_regs { __be32 amask; u32 res4[0x2]; } amask_cs[FSL_IFC_BANK_COUNT]; - u32 res5[0x17]; + u32 res5[0x18]; struct { - __be32 csor_ext; __be32 csor; + __be32 csor_ext; u32 res6; } csor_cs[FSL_IFC_BANK_COUNT]; - u32 res7[0x19]; + u32 res7[0x18]; struct { __be32 ftim[4]; u32 res8[0x8]; From 7b0e6d6f6de5916aee2a972a4e3a622009b9743a Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 25 Aug 2014 16:40:05 +0530 Subject: [PATCH 007/130] powerpc: configs: Add VFAT file-system configs Add CONFIG_NLS_CODEPAGE_437, CONFIG_NLS_CODEPAGE_850, CONFIG_NLS_ISO8859_1 in default configs for 85xx and 86xx socs. Required for mounting vfat file-systems on USB devices Signed-off-by: Ramneek Mehresh Signed-off-by: Nikhil Badola Signed-off-by: Scott Wood --- arch/powerpc/configs/corenet32_smp_defconfig | 2 ++ arch/powerpc/configs/corenet64_smp_defconfig | 2 ++ arch/powerpc/configs/mpc85xx_defconfig | 3 +++ arch/powerpc/configs/mpc85xx_smp_defconfig | 3 +++ arch/powerpc/configs/mpc86xx_defconfig | 3 +++ 5 files changed, 13 insertions(+) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 6a3c58adf253..688e9e4d29a1 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -165,6 +165,8 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 4b07bade1ba9..f54eb448002b 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -157,6 +157,8 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index fa1bfd37f1ec..8b2d759e69f6 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -227,6 +227,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 0b452ebd8b3d..a3003b2e29f8 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -228,6 +228,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 35595ea74ff4..fc58aa8a89e4 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -145,6 +145,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_ADFS_FS=m From 00406e8772c61feb57c1baeb97531aa199614e65 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Tue, 26 Aug 2014 16:46:11 -0500 Subject: [PATCH 008/130] powerpc: fsl_pci: Add forced PCI Agent enumeration The following commit prevents the MPC8548E on the XPedite5200 PrPMC module from enumerating its PCI/PCI-X bus: powerpc/fsl-pci: use 'Header Type' to identify PCIE mode The previous patch prevents any Freescale PCI-X bridge from enumerating the bus, if it is hardware strapped into Agent mode. In PCI-X, the Host is responsible for driving the PCI-X initialization pattern to devices on the bus, so that they know whether to operate in conventional PCI or PCI-X mode as well as what the bus timing will be. For a PCI-X PrPMC, the pattern is driven by the mezzanine carrier it is installed onto. Therefore, PrPMCs are PCI-X Agents, but one per system may still enumerate the bus. This patch causes the device node of any PCI/PCI-X bridge strapped into Agent mode to be checked for the fsl,pci-agent-force-enum property. If the property is present in the node, the bridge will be allowed to enumerate the bus. Cc: Minghuan Lian Signed-off-by: Aaron Sierra Signed-off-by: Scott Wood --- .../devicetree/bindings/pci/fsl,pci.txt | 27 +++++++++++++++++++ arch/powerpc/sysdev/fsl_pci.c | 3 ++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/pci/fsl,pci.txt diff --git a/Documentation/devicetree/bindings/pci/fsl,pci.txt b/Documentation/devicetree/bindings/pci/fsl,pci.txt new file mode 100644 index 000000000000..d8ac4a768e7e --- /dev/null +++ b/Documentation/devicetree/bindings/pci/fsl,pci.txt @@ -0,0 +1,27 @@ +* Bus Enumeration by Freescale PCI-X Agent + +Typically any Freescale PCI-X bridge hardware strapped into Agent mode +is prevented from enumerating the bus. The PrPMC form-factor requires +all mezzanines to be PCI-X Agents, but one per system may still +enumerate the bus. + +The property defined below will allow a PCI-X bridge to be used for bus +enumeration despite being strapped into Agent mode. + +Required properties: +- fsl,pci-agent-force-enum : There is no value associated with this + property. The property itself is treated as a boolean. + +Example: + + /* PCI-X bridge known to be PrPMC Monarch */ + pci0: pci@ef008000 { + fsl,pci-agent-force-enum; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci"; + device_type = "pci"; + ... + ... + }; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c5077673bd94..65d2ed4549e6 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -522,7 +522,8 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) } else { /* For PCI read PROG to identify controller mode */ early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif); - if ((progif & 1) == 1) + if ((progif & 1) && + !of_property_read_bool(dev, "fsl,pci-agent-force-enum")) goto no_bridge; } From 26a047ab101a6ab765b805c7417e82e1ed413ff2 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Thu, 28 Aug 2014 09:21:46 +0530 Subject: [PATCH 009/130] powerpc: dts: t4240: Change T4240 USB controller version Change USB controller version to 2.5 in compatible string for T4240 Signed-off-by: Nikhil Badola Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index a3d582e0361a..7e2fc7cdce48 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -498,13 +498,13 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; phy_type = "utmi"; port0; }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; dr_mode = "host"; phy_type = "utmi"; }; From 834952314c8bae7331b0797a071958dda9bec60d Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Tue, 19 Aug 2014 14:25:01 +0300 Subject: [PATCH 010/130] powerpc/fsl_msi: reorganize structs to improve clarity and flexibility Store cascade_data in an array inside the driver data for later use. Get rid of the msi_virq array since now we can encapsulate the virqs in the cascade_data directly and access them through the array mentioned earlier. Signed-off-by: Laurentiu Tudor Cc: Scott Wood Cc: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 17 +++++++++++------ arch/powerpc/sysdev/fsl_msi.h | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 9fadc6e4dc47..05a0dd918de2 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -50,6 +50,7 @@ struct fsl_msi_feature { struct fsl_msi_cascade_data { struct fsl_msi *msi_data; int index; + int virq; }; static inline u32 fsl_msi_read(u32 __iomem *base, unsigned int reg) @@ -327,15 +328,18 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) { struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; - struct fsl_msi_cascade_data *cascade_data; if (msi->list.prev != NULL) list_del(&msi->list); for (i = 0; i < NR_MSI_REG_MAX; i++) { - virq = msi->msi_virqs[i]; - if (virq != NO_IRQ) { - cascade_data = irq_get_handler_data(virq); - kfree(cascade_data); + if (msi->cascade_array[i]) { + virq = msi->cascade_array[i]->virq; + + BUG_ON(virq == NO_IRQ); + BUG_ON(msi->cascade_array[i] != + irq_get_handler_data(virq)); + + kfree(msi->cascade_array[i]); irq_dispose_mapping(virq); } } @@ -369,9 +373,10 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, return -ENOMEM; } irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); - msi->msi_virqs[irq_index] = virt_msir; cascade_data->index = offset; cascade_data->msi_data = msi; + cascade_data->virq = virt_msir; + msi->cascade_array[irq_index] = cascade_data; irq_set_handler_data(virt_msir, cascade_data); irq_set_chained_handler(virt_msir, fsl_msi_cascade); diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index df9aa9fe0933..420cfcbdac01 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -27,6 +27,8 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 +struct fsl_msi_cascade_data; + struct fsl_msi { struct irq_domain *irqhost; @@ -37,7 +39,7 @@ struct fsl_msi { u32 srs_shift; /* Shift of the shared interrupt register select */ void __iomem *msi_regs; u32 feature; - int msi_virqs[NR_MSI_REG_MAX]; + struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX]; struct msi_bitmap bitmap; From 543c043cbae79164aa087f96294cb37fc4a19a59 Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Tue, 19 Aug 2014 14:25:03 +0300 Subject: [PATCH 011/130] powerpc/fsl_msi: change the irq handler from chained to normal As we do for other fsl-mpic related cascaded irqchips (e.g. error ints, mpic timers), use a normal irq handler for msi irqs too. This brings some advantages such as mask/unmask/ack/eoi and irq state taken care behind the scenes, kstats updates a.s.o plus access to features provided by mpic, such as affinity. Signed-off-by: Laurentiu Tudor Cc: Scott Wood Cc: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 56 +++++++++++------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 05a0dd918de2..0cfc32a63039 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -241,40 +242,24 @@ out_free: return rc; } -static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) +static irqreturn_t fsl_msi_cascade(int irq, void *data) { - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_data *idata = irq_desc_get_irq_data(desc); unsigned int cascade_irq; struct fsl_msi *msi_data; int msir_index = -1; u32 msir_value = 0; u32 intr_index; u32 have_shift = 0; - struct fsl_msi_cascade_data *cascade_data; + struct fsl_msi_cascade_data *cascade_data = data; + irqreturn_t ret = IRQ_NONE; - cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; - raw_spin_lock(&desc->lock); - if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { - if (chip->irq_mask_ack) - chip->irq_mask_ack(idata); - else { - chip->irq_mask(idata); - chip->irq_ack(idata); - } - } - - if (unlikely(irqd_irq_inprogress(idata))) - goto unlock; - msir_index = cascade_data->index; if (msir_index >= NR_MSI_REG_MAX) cascade_irq = NO_IRQ; - irqd_set_chained_irq_inprogress(idata); switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: msir_value = fsl_msi_read(msi_data->msi_regs, @@ -303,25 +288,15 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) cascade_irq = irq_linear_revmap(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq != NO_IRQ) + if (cascade_irq != NO_IRQ) { generic_handle_irq(cascade_irq); + ret = IRQ_HANDLED; + } have_shift += intr_index + 1; msir_value = msir_value >> (intr_index + 1); } - irqd_clr_chained_irq_inprogress(idata); - switch (msi_data->feature & FSL_PIC_IP_MASK) { - case FSL_PIC_IP_MPIC: - case FSL_PIC_IP_VMPIC: - chip->irq_eoi(idata); - break; - case FSL_PIC_IP_IPIC: - if (!irqd_irq_disabled(idata) && chip->irq_unmask) - chip->irq_unmask(idata); - break; - } -unlock: - raw_spin_unlock(&desc->lock); + return ret; } static int fsl_of_msi_remove(struct platform_device *ofdev) @@ -336,9 +311,8 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) virq = msi->cascade_array[i]->virq; BUG_ON(virq == NO_IRQ); - BUG_ON(msi->cascade_array[i] != - irq_get_handler_data(virq)); + free_irq(virq, msi->cascade_array[i]); kfree(msi->cascade_array[i]); irq_dispose_mapping(virq); } @@ -358,7 +332,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) { struct fsl_msi_cascade_data *cascade_data = NULL; - int virt_msir, i; + int virt_msir, i, ret; virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index); if (virt_msir == NO_IRQ) { @@ -377,8 +351,14 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, cascade_data->msi_data = msi; cascade_data->virq = virt_msir; msi->cascade_array[irq_index] = cascade_data; - irq_set_handler_data(virt_msir, cascade_data); - irq_set_chained_handler(virt_msir, fsl_msi_cascade); + + ret = request_irq(virt_msir, fsl_msi_cascade, 0, + "fsl-msi-cascade", cascade_data); + if (ret) { + dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", + virt_msir, ret); + return ret; + } /* Release the hwirqs corresponding to this MSI register */ for (i = 0; i < IRQS_PER_MSI_REG; i++) From de99f53d3af0e1bb9bc6c9e3107d72f3af6e05e9 Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Tue, 19 Aug 2014 14:25:05 +0300 Subject: [PATCH 012/130] powerpc/fsl_msi: show more meaningful names in /proc/interrupts Rename the irq controller associated with a MSI interrupt to fsl-msi-, where is the virq of the cascade irq backing up this MSI interrupt. This way, one can set the affinity of a MSI through the cascade irq associated with said MSI interrupt. Given this example /proc/interrupts snippet: CPU0 CPU1 CPU2 CPU3 16: 0 0 0 0 OpenPIC 16 Edge mpic-error-int 17: 0 4 0 0 fsl-msi-224 0 Edge eth0-rx-0 18: 0 5 0 0 fsl-msi-225 1 Edge eth0-tx-0 19: 0 2 0 0 fsl-msi-226 2 Edge eth0 [...] 224: 0 11 0 0 OpenPIC 224 Edge fsl-msi-cascade 225: 0 0 0 0 OpenPIC 225 Edge fsl-msi-cascade 226: 0 0 0 0 OpenPIC 226 Edge fsl-msi-cascade [...] To change the affinity of MSI interrupt 17 (having the irq controller named "fsl-msi-224") instead of writing /proc/irq/17/smp_affinity, use the associated MSI cascade irq, in this case, interrupt 224, e.g.: echo 6 > /proc/irq/224/smp_affinity Note that a MSI cascade irq covers several MSI interrupts, so changing the affinity on the cascade will impact all of the associated MSI interrupts. Signed-off-by: Laurentiu Tudor Cc: Scott Wood Cc: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 0cfc32a63039..e2ee226464f8 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -67,11 +68,24 @@ static void fsl_msi_end_irq(struct irq_data *d) { } +static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) +{ + struct fsl_msi *msi_data = irqd->domain->host_data; + irq_hw_number_t hwirq = irqd_to_hwirq(irqd); + int cascade_virq, srs; + + srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; + cascade_virq = msi_data->cascade_array[srs]->virq; + + seq_printf(p, " fsl-msi-%d", cascade_virq); +} + + static struct irq_chip fsl_msi_chip = { .irq_mask = mask_msi_irq, .irq_unmask = unmask_msi_irq, .irq_ack = fsl_msi_end_irq, - .name = "FSL-MSI", + .irq_print_chip = fsl_msi_print_chip, }; static int fsl_msi_host_map(struct irq_domain *h, unsigned int virq, From c822e73731fce3b49a4887140878d084d8a44c08 Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Thu, 21 Aug 2014 12:33:53 +0300 Subject: [PATCH 013/130] powerpc/fsl_msi: spread msi ints across different MSIRs Allocate msis such that each time a new interrupt is requested, the SRS (MSIR register select) to be used is allocated in a round-robin fashion. The end result is that the msi interrupts will be spread across distinct MSIRs with the main benefit that now users can set affinity to each msi int through the mpic irq backing up the MSIR register. This is achieved with the help of a newly introduced msi bitmap api that allows specifying the starting point when searching for a free msi interrupt. Signed-off-by: Laurentiu Tudor Cc: Scott Wood Cc: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/include/asm/msi_bitmap.h | 2 ++ arch/powerpc/sysdev/fsl_msi.c | 31 ++++++++++++++++++++++++++- arch/powerpc/sysdev/fsl_msi.h | 5 +++++ arch/powerpc/sysdev/msi_bitmap.c | 25 ++++++++++++--------- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/msi_bitmap.h b/arch/powerpc/include/asm/msi_bitmap.h index 97ac3f46ae0d..96c2f9500574 100644 --- a/arch/powerpc/include/asm/msi_bitmap.h +++ b/arch/powerpc/include/asm/msi_bitmap.h @@ -25,6 +25,8 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num); void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, unsigned int num); void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq); +int msi_bitmap_alloc_hwirqs_from_offset(struct msi_bitmap *bmp, int offset, + int num); int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp); diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index e2ee226464f8..37254eff7324 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -213,6 +213,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) * available interrupt. */ list_for_each_entry(msi_data, &msi_head, list) { + int off; + /* * If the PCI node has an fsl,msi property, then we * restrict our search to the corresponding MSI node. @@ -224,7 +226,28 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (phandle && (phandle != msi_data->phandle)) continue; - hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1); + /* + * Allocate the msi message so that it fits on distinct + * MSIR registers. Obviously, since MSIR registers are + * limited they will overlap at one point. + * + * Due to the format of the newly introduced MSIIR1 in + * mpic 4.3, consecutive msi message values map to + * distinct MSIRs, thus distinct msi irq cascades, so + * nothing special needs to be done in this case. + * On older mpic versions the chose distinct SRS + * values by aligning the msi message value to the + * SRS field shift. + */ + if (msi_data->feature & FSL_PIC_FTR_MPIC_4_3) { + off = 0; + } else { + off = atomic_inc_return(&msi_data->msi_alloc_cnt) % + msi_data->msir_num; + off <<= msi_data->srs_shift; + } + hwirq = msi_bitmap_alloc_hwirqs_from_offset( + &msi_data->bitmap, off, 1); if (hwirq >= 0) break; } @@ -464,12 +487,17 @@ static int fsl_of_msi_probe(struct platform_device *dev) goto error_out; } + atomic_set(&msi->msi_alloc_cnt, -1); + p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len); if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") || of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) { msi->srs_shift = MSIIR1_SRS_SHIFT; msi->ibs_shift = MSIIR1_IBS_SHIFT; + msi->msir_num = NR_MSI_REG_MSIIR1; + msi->feature |= FSL_PIC_FTR_MPIC_4_3; + if (p) dev_warn(&dev->dev, "%s: dose not support msi-available-ranges property\n", __func__); @@ -487,6 +515,7 @@ static int fsl_of_msi_probe(struct platform_device *dev) msi->srs_shift = MSIIR_SRS_SHIFT; msi->ibs_shift = MSIIR_IBS_SHIFT; + msi->msir_num = NR_MSI_REG_MSIIR; if (p && len % (2 * sizeof(u32)) != 0) { dev_err(&dev->dev, "%s: Malformed msi-available-ranges property\n", diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index 420cfcbdac01..50ec4b04c732 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -15,6 +15,7 @@ #include #include +#include #define NR_MSI_REG_MSIIR 8 /* MSIIR can index 8 MSI registers */ #define NR_MSI_REG_MSIIR1 16 /* MSIIR1 can index 16 MSI registers */ @@ -27,6 +28,8 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 +#define FSL_PIC_FTR_MPIC_4_3 0x00000010 + struct fsl_msi_cascade_data; struct fsl_msi { @@ -37,6 +40,8 @@ struct fsl_msi { u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */ u32 ibs_shift; /* Shift of interrupt bit select */ u32 srs_shift; /* Shift of the shared interrupt register select */ + u32 msir_num; /* Number of available MSIR regs */ + atomic_t msi_alloc_cnt; /* Counter for MSI hwirq allocations */ void __iomem *msi_regs; u32 feature; struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX]; diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2ff630267e9e..8b7d8fc2b120 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -14,23 +14,28 @@ #include #include -int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) +int msi_bitmap_alloc_hwirqs_from_offset(struct msi_bitmap *bmp, int offset, + int num) { unsigned long flags; - int offset, order = get_count_order(num); + int index; + int order = get_count_order(num); spin_lock_irqsave(&bmp->lock, flags); - /* - * This is fast, but stricter than we need. We might want to add - * a fallback routine which does a linear search with no alignment. - */ - offset = bitmap_find_free_region(bmp->bitmap, bmp->irq_count, order); + index = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, + offset, num, (1 << order) - 1); + bitmap_set(bmp->bitmap, index, num); spin_unlock_irqrestore(&bmp->lock, flags); - pr_debug("msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: found %d free bits starting from offset %d at index %d\n", + num, offset, index); - return offset; + return index; +} + +int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) +{ + return msi_bitmap_alloc_hwirqs_from_offset(bmp, 0, num); } void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, From ae466bde19752f8638fd458225fb65ded5db5e16 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:37 +0200 Subject: [PATCH 014/130] powerpc/8xx: Declare SPRG2 as a SCRATCH register Since commit 469d62be9263b92f2c3329540cbb1c076111f4f3, SPRG2 is used as a scratch register just like SPRG0 and SPRG1. So Declare it as such and fix the comment which is not valid anymore since that commit. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/reg.h | 3 ++- arch/powerpc/kernel/head_8xx.S | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bffd89d27301..1ff8ba94b9f4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -950,7 +950,7 @@ * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors - * - SPRG2 apparently unused but initialized + * - SPRG2 scratch for exception vectors * */ #ifdef CONFIG_PPC64 @@ -1060,6 +1060,7 @@ #ifdef CONFIG_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #endif diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7ee876d2adb5..9db2e4491922 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ InstructionTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 @@ -363,7 +363,7 @@ InstructionTLBMiss: mfspr r10, SPRN_DAR mtcr r10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcr r11 @@ -386,7 +386,7 @@ InstructionTLBMiss: mtcr r10 li r11, 0x00f0 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcr r11 @@ -409,7 +409,7 @@ DataStoreTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ @@ -487,7 +487,7 @@ DataStoreTLBMiss: mfspr r10, SPRN_DAR mtcr r10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) From 92625d491e59719f5241bad31eb0f2295479b019 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:37 +0200 Subject: [PATCH 015/130] powerpc/8xx: Use SCRATCH0 and SCRATCH1 also for TLB handlers SCRATCH0 and SCRATCH1 are only used in Exceptions prologs where no other exception can happen. There is therefore no need to preserve them accross TLB handlers, we can use them there as in other exceptions. One of the advantages is that they do not suffer CPU6 errata unlike M_TW register. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 104 ++++++++++++--------------------- 1 file changed, 36 insertions(+), 68 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9db2e4491922..17158089833b 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -104,12 +104,15 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ + EXCEPTION_PROLOG_0; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 +#define EXCEPTION_PROLOG_0 \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcr r10 + #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -144,6 +147,14 @@ turn_on_mmu: SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +/* + * Exception exit code. + */ +#define EXCEPTION_EPILOG_0 \ + mtcr r10; \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + mfspr r11,SPRN_SPRG_SCRATCH1 + /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -293,16 +304,8 @@ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -359,18 +362,11 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi 2: mfspr r11, SPRN_SRR1 @@ -381,19 +377,11 @@ InstructionTLBMiss: mtspr SPRN_SRR1, r11 /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - li r11, 0x00f0 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b InstructionAccess . = 0x1200 @@ -401,16 +389,8 @@ DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -483,19 +463,12 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcr r10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - mtspr SPRN_DAR, r11 /* Tag DAR */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -519,23 +492,18 @@ DataTLBError: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcr r10 - stw r10, 0(r0) - stw r11, 4(r0) + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_DAR cmpwi cr0, r10, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ - mfspr r10, SPRN_M_TW /* Restore registers */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10,SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b DataAccess EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) @@ -619,8 +587,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ stw r11,0(r10) /* store add/and instruction */ dcbf 0,r10 /* flush new instr. to memory. */ icbi 0,r10 /* invalidate instr. cache line */ - lwz r11, 4(r0) /* restore r11 from memory */ - mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */ + mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */ isync /* Wait until new instr is loaded from memory */ modified_instr: .space 4 /* this is where the add instr. is stored */ @@ -683,9 +651,9 @@ modified_instr: b DARFixed /* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ -153: lwz r11, 4(r0) /* load r11 from memory */ +153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ b 155f -154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ +154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ 155: add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b From 3e43640346507caaa0b3b03882a93f641ace4e58 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:37 +0200 Subject: [PATCH 016/130] powerpc/8xx: Remove loading of r10 at end of FixupDAR Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, r10 is not used anymore after FixupDAR. There is therefore no need to set it up with the value of DAR. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 17158089833b..0bed748bbb85 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -498,7 +498,7 @@ DataTLBError: mfspr r10, SPRN_DAR cmpwi cr0, r10, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ -DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ +DARFixed:/* Return from dcbx instruction bug workaround */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif @@ -527,7 +527,7 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. - * DAR is set to the calculated address and r10 also holds the EA on exit. + * DAR is set to the calculated address. */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE @@ -567,8 +567,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+ 142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+ 142f -141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ - b DARFixed /* Nope, go back to normal TLB processing */ +141: b DARFixed /* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ From 140a6a60ba18da6b36e7fe1cafbffd09bc805f21 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:38 +0200 Subject: [PATCH 017/130] powerpc/8xx: Fix comment about DIRTY update Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, dirty handling is not handled here anymore. So we fix the comment. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0bed748bbb85..899c70174db4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -480,12 +480,8 @@ InstructionTLBError: b InstructionAccess /* This is the data TLB error on the MPC8xx. This could be due to - * many reasons, including a dirty update to a pte. We can catch that - * one here, but anything else is an error. First, we track down the - * Linux pte. If it is valid, write access is allowed, but the - * page dirty bit is not set, we will set it and reload the TLB. For - * any other case, we bail out to a higher level function that can - * handle it. + * many reasons, including a dirty update to a pte. We bail out to + * a higher level function that can handle it. */ . = 0x1400 DataTLBError: From 5bcbe24f6c7aec918b8cb75e53f8f7f897e3a091 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:38 +0200 Subject: [PATCH 018/130] powerpc/8xx: No need to save r10 and r3 when not calling FixupDAR r10 and r3 are only used inside FixupDAR function. So lets save them inside that function only. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 899c70174db4..6f544eab983e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -485,20 +485,12 @@ InstructionTLBError: */ . = 0x1400 DataTLBError: -#ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) -#endif EXCEPTION_PROLOG_0 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_DAR - cmpwi cr0, r10, 0x00f0 + mfspr r11, SPRN_DAR + cmpwi cr0, r11, 0x00f0 beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif - mfspr r10,SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 b DataAccess @@ -528,6 +520,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ @@ -543,6 +539,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r11, 0(r11) /* Get the pte */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) @@ -563,15 +562,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+ 142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+ 142f -141: b DARFixed /* Nope, go back to normal TLB processing */ +141: mfspr r10,SPRN_SPRG_SCRATCH2 + b DARFixed /* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 142: /* continue, it was a dcbx, dcbi instruction. */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ -#endif #ifndef NO_SELF_MODIFYING_CODE andis. r10,r11,0x1f /* test if reg RA is r0 */ li r10,modified_instr@l @@ -590,6 +587,7 @@ modified_instr: bne+ 143f subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ 143: mtdar r10 /* store faulting EA in DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ #else mfctr r10 @@ -643,6 +641,7 @@ modified_instr: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ mtdar r10 /* save fault EA to DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ From 41cacac63c98803e9fbb054db0b74f92a8c082e4 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:38 +0200 Subject: [PATCH 019/130] powerpc/8xx: Optimize verification in FixupDAR By XORing the upper part of the instruction code, we get a value that can directly be verified with the second test and we can remove the first test. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6f544eab983e..1bdd7c13ddc1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -548,10 +548,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ - srwi r10, r11, 26 /* check if major OP code is 31 */ - cmpwi cr0, r10, 31 - bne- 141f - rlwinm r10, r11, 0, 21, 30 + xoris r10, r11, 0x7c00 /* check if major OP code is 31 */ + rlwinm r10, r10, 0, 21, 5 cmpwi cr0, r10, 2028 /* Is dcbz? */ beq+ 142f cmpwi cr0, r10, 940 /* Is dcbi? */ From 111e32b2f6b3f9d348b549013dab8f5053acdf19 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 29 Aug 2014 11:14:39 +0200 Subject: [PATCH 020/130] powerpc/8xx: Duplicate two insns instead of branching Branching takes two cycles on MPC8xx. Lets duplicate the two instructions and avoid the branching. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1bdd7c13ddc1..fafff8dbd5d9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -644,9 +644,11 @@ modified_instr: /* special handling for r10,r11 since these are modified already */ 153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ - b 155f + add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b 154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ -155: add r10, r10, r11 /* add it */ + add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b #endif From 0babcd1c1489ae31bf0cea34fad24b3f047ad79c Mon Sep 17 00:00:00 2001 From: Priyanka Jain Date: Fri, 5 Sep 2014 16:14:40 +0530 Subject: [PATCH 021/130] powerpc/fsl-booke: Add initial T1040/T1042 RDB board support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T1040/T1042RDB is Freescale Reference Design Board. The board can support both T1040/T1042 QorIQ Power Architecture™ processor. T1040/T1042RDB board Overview ----------------------- - SERDES Connections, 8 lanes supporting: - PCI - SGMII - QSGMII - SATA 2.0 - DDR Controller - Supports rates of up to 1600 MHz data-rate - Supports one DDR3LP UDIMM -IFC/Local Bus - NAND flash: 1GB 8-bit NAND flash - NOR: 128MB 16-bit NOR Flash - Ethernet - Two on-board RGMII 10/100/1G ethernet ports. - PHY #0 remains powered up during deep-sleep - CPLD - Clocks - System and DDR clock (SYSCLK, “DDRCLK”) - SERDES clocks - Power Supplies - USB - Supports two USB 2.0 ports with integrated PHYs - Two type A ports with 5V@1.5A per port. - SDHC - SDHC/SDXC connector - SPI - On-board 64MB SPI flash - I2C - Devices connected: EEPROM, thermal monitor, VID controller - Other IO - Two Serial ports - ProfiBus port Add support for T1040/T1042 RDB board: -add device tree -add entry in Kconfig to build -Add entry in corenet_generic.c, as it is similar to other corenet platforms Signed-off-by: Priyanka Jain Signed-off-by: Poonam Aggrwal Signed-off-by: Prabhakar Kushwaha Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1040rdb.dts | 48 ++++++ arch/powerpc/boot/dts/t1042rdb.dts | 48 ++++++ arch/powerpc/boot/dts/t104xrdb.dtsi | 156 ++++++++++++++++++ arch/powerpc/platforms/85xx/Kconfig | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 2 + 5 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/boot/dts/t1040rdb.dts create mode 100644 arch/powerpc/boot/dts/t1042rdb.dts create mode 100644 arch/powerpc/boot/dts/t104xrdb.dtsi diff --git a/arch/powerpc/boot/dts/t1040rdb.dts b/arch/powerpc/boot/dts/t1040rdb.dts new file mode 100644 index 000000000000..79a0bed04c1a --- /dev/null +++ b/arch/powerpc/boot/dts/t1040rdb.dts @@ -0,0 +1,48 @@ +/* + * T1040RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1040RDB"; + compatible = "fsl,T1040RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1040rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t1042rdb.dts b/arch/powerpc/boot/dts/t1042rdb.dts new file mode 100644 index 000000000000..738c23790e94 --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb.dts @@ -0,0 +1,48 @@ +/* + * T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB"; + compatible = "fsl,T1042RDB"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb-cpld"; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi new file mode 100644 index 000000000000..1cf0f3c5f7e5 --- /dev/null +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -0,0 +1,156 @@ +/* + * T1040RDB/T1042RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + cpld@3,0 { + reg = <3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512a"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clock */ + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci3: pcie@ffe270000 { + reg = <0xf 0xfe270000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 0c1e6903597e..f22635a71d01 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -276,7 +276,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS + P5020 DS, P5040 DS and T104xQDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index c2adb00228c5..cc289783dbfe 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -140,6 +140,8 @@ static const char * const boards[] __initconst = { "fsl,B4220QDS", "fsl,T1040QDS", "fsl,T1042QDS", + "fsl,T1040RDB", + "fsl,T1042RDB", "keymile,kmcoge4", NULL }; From 667680f6841bb5cc239382fea916bf4ab6803842 Mon Sep 17 00:00:00 2001 From: Priyanka Jain Date: Fri, 5 Sep 2014 16:15:00 +0530 Subject: [PATCH 022/130] powerpc/fsl-booke: Add initial T1042RDB_PI board support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T1042RDB_PI is Freescale Reference Design Board supporting the T1042 QorIQ Power Architecture™ processor. T1042 is a reduced personality of T1040 SoC without Integrated 8-port Gigabit. The board is designed with low power features targeted for Printing Image Market. T1042RDB_PI is similar to T1040RDB board with few differences like it has video interface, supports T1042 personality only T1042RDB_PI board Overview ----------------------- - SERDES Connections, 8 lanes supporting: - PCI - SATA 2.0 - DDR Controller - Supports rates of up to 1600 MHz data-rate - Supports one DDR3LP UDIMM -IFC/Local Bus - NAND flash: 1GB 8-bit NAND flash - NOR: 128MB 16-bit NOR Flash - Ethernet - Two on-board RGMII 10/100/1G ethernet ports. - PHY #0 remains powered up during deep-sleep - CPLD - Clocks - System and DDR clock (SYSCLK, “DDRCLK”) - SERDES clocks - Power Supplies - USB - Supports two USB 2.0 ports with integrated PHYs - Two type A ports with 5V@1.5A per port. - SDHC - SDHC/SDXC connector - SPI - On-board 64MB SPI flash - I2C - Device connected: EEPROM, thermal monitor, VID controller, RTC - Other IO - Two Serial ports - ProfiBus port Add support for T1042RDB_PI board: -add device tree -Add entry in corenet_generic.c, as it is similar to other corenet platforms Signed-off-by: Poonam Aggrwal Signed-off-by: Prabhakar Kushwaha Signed-off-by: Priyanka Jain Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1042rdb_pi.dts | 57 +++++++++++++++++++ arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 2 files changed, 58 insertions(+) create mode 100644 arch/powerpc/boot/dts/t1042rdb_pi.dts diff --git a/arch/powerpc/boot/dts/t1042rdb_pi.dts b/arch/powerpc/boot/dts/t1042rdb_pi.dts new file mode 100644 index 000000000000..634f751fa6d3 --- /dev/null +++ b/arch/powerpc/boot/dts/t1042rdb_pi.dts @@ -0,0 +1,57 @@ +/* + * T1042RDB_PI Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t104xsi-pre.dtsi" +/include/ "t104xrdb.dtsi" + +/ { + model = "fsl,T1042RDB_PI"; + compatible = "fsl,T1042RDB_PI"; + ifc: localbus@ffe124000 { + cpld@3,0 { + compatible = "fsl,t1042rdb_pi-cpld"; + }; + }; + soc: soc@ffe000000 { + i2c@118000 { + rtc@68 { + compatible = "dallas,ds1337"; + reg = <0x68>; + interrupts = <0x2 0x1 0 0>; + }; + }; + }; +}; + +/include/ "fsl/t1042si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index cc289783dbfe..e56b89a792ed 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -142,6 +142,7 @@ static const char * const boards[] __initconst = { "fsl,T1042QDS", "fsl,T1040RDB", "fsl,T1042RDB", + "fsl,T1042RDB_PI", "keymile,kmcoge4", NULL }; From a7d6e223f5999df8d9ceb545459b5f726d86ce6a Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Wed, 13 Aug 2014 18:40:25 +0300 Subject: [PATCH 023/130] powerpc/fsl-booke64: add missing virtualization options in defconfig The 32-bit defconfig version has these enabled for years so make the 64-bit defconfig have them too. This patch only adds CONFIG_VIRT_DRIVERS, CONFIG_FSL_HV_MANAGER and CONFIG_PPC_EPAPR_HV_BYTECHAN other changes being "make savedefconfig" artifacts. Signed-off-by: Laurentiu Tudor Signed-off-by: Scott Wood --- arch/powerpc/configs/corenet64_smp_defconfig | 44 ++++---------------- 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index f54eb448002b..2eb27046434e 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -49,7 +49,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set @@ -59,33 +58,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_NAND_ECC=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_IDS=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -101,6 +84,7 @@ CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_SERIO_LIBPS2=y +CONFIG_PPC_EPAPR_HV_BYTECHAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_MANY_PORTS=y @@ -114,7 +98,6 @@ CONFIG_SPI_GPIO=y CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_HID=m CONFIG_USB=y CONFIG_USB_MON=y @@ -123,14 +106,17 @@ CONFIG_USB_EHCI_FSL=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_VIRT_DRIVERS=y +CONFIG_FSL_HV_MANAGER=y +CONFIG_FSL_CORENET_CF=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m @@ -143,16 +129,9 @@ CONFIG_NTFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -CONFIG_MISC_FILESYSTEMS=y CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=1 -CONFIG_JFFS2_FS_WRITEBUFFER=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_RTIME=y CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_XATTR=y -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -162,18 +141,12 @@ CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y -CONFIG_CRC16=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y +CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=1024 -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y @@ -181,4 +154,3 @@ CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_FSL_CAAM=y -CONFIG_FSL_CORENET_CF=y From c22b47f03cdf01c7c4859b818b2a413251dde28d Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Thu, 21 Aug 2014 16:01:30 +0530 Subject: [PATCH 024/130] powerpc: dts: t208x: Change T208x USB controller version Change USB controller version to 2.5 in compatible string for T2080/T2081 Signed-off-by: Nikhil Badola Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 97479f0ce630..aecee9690a88 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -410,7 +410,7 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ phy_type = "utmi"; @@ -418,7 +418,7 @@ }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */ dr_mode = "host"; From 94105a762016aea3284c07c813017b90a6325f10 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 16 Sep 2014 19:47:00 -0500 Subject: [PATCH 025/130] powerpc/85xx/defconfig: Remove duplicate CONFIG_RTC_DRV_DS1307 Commit a95e8c28b3dc "powerpc/defconfig: update RTC support" duplicated the CONFIG_RTC_DRV_DS1307 symbol in mpc85xx_defconfig and mpc85xx_smp_defconfig, resulting in this: arch/powerpc/configs/mpc85xx_smp_defconfig:217:warning: override: reassigning to symbol RTC_DRV_DS1307 Fixes: a95e8c28b3dc "powerpc/defconfig: update RTC support" Cc: Shengzhou Liu Signed-off-by: Scott Wood --- arch/powerpc/configs/mpc85xx_defconfig | 1 - arch/powerpc/configs/mpc85xx_smp_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 8b2d759e69f6..d2c415489f72 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -213,7 +213,6 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y -CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index a3003b2e29f8..87460083dbc7 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -214,7 +214,6 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y -CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set From 6db35ad2373eed5deb3b105ae7c1e9de3e34ae94 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 18 Sep 2014 14:05:02 -0500 Subject: [PATCH 026/130] powerpc/mm: Use common paging_init() for NUMA Commit 1c98025c6c95bc057a25e2c6596de23288c68160 "powerpc: Dynamic DMA zone limits" updated how zones are created in paging_init(), but missed the NUMA version of paging_init(). This was noticed via a linker error, since dma_pfn_limit_to_zone() was, like the non-NUMA paging_init(), limited by #ifndef CONFIG_NEED_MULTIPLE_NODES. It turns out that the NUMA paging_init() was not actually doing anything different from the standard paging_init(), other than a couple debug prints, a couple 32-bit-only ifdef sections, and a call to mark_nonram_nosave(). It's not clear whether mark_nonram_nosave() is inherently wrong to do for NUMA, or just not useful on targets that have NUMA, but for now I'm preserving the existing behavior. Fixes: 1c98025c6c9 "powerpc: Dynamic DMA zone limits" Reported-by: Stephen Rothwell Signed-off-by: Scott Wood --- arch/powerpc/mm/mem.c | 7 ++++++- arch/powerpc/mm/numa.c | 8 -------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 687e7f7f7751..420dfff22ba5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -259,6 +259,12 @@ static int __init mark_nonram_nosave(void) } return 0; } +#else /* CONFIG_NEED_MULTIPLE_NODES */ +static int __init mark_nonram_nosave(void) +{ + return 0; +} +#endif static bool zone_limits_final; @@ -351,7 +357,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static void __init register_page_bootmem_info(void) { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3b181b22cd46..5eb07f332de1 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1126,14 +1126,6 @@ void __init do_init_bootmem(void) (void *)(unsigned long)boot_cpuid); } -void __init paging_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; - free_area_init_nodes(max_zone_pfns); -} - static int __init early_numa(char *p) { if (!p) From cb0446c1b625326682ec4f9d1dd10779433646bc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 19 Sep 2014 15:20:36 -0500 Subject: [PATCH 027/130] Revert "powerpc/fsl_msi: spread msi ints across different MSIRs" This reverts commit c822e73731fce3b49a4887140878d084d8a44c08. This commit conflicted with a bitmap allocator change that partially accomplishes the same thing, but which does so more correctly. Revert this one until it can be respun on top of the correct change. Signed-off-by: Scott Wood --- arch/powerpc/include/asm/msi_bitmap.h | 2 -- arch/powerpc/sysdev/fsl_msi.c | 31 +----------------------- arch/powerpc/sysdev/fsl_msi.h | 5 ---- arch/powerpc/sysdev/msi_bitmap.c | 35 ++++++++++++--------------- 4 files changed, 16 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/include/asm/msi_bitmap.h b/arch/powerpc/include/asm/msi_bitmap.h index 96c2f9500574..97ac3f46ae0d 100644 --- a/arch/powerpc/include/asm/msi_bitmap.h +++ b/arch/powerpc/include/asm/msi_bitmap.h @@ -25,8 +25,6 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num); void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, unsigned int num); void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq); -int msi_bitmap_alloc_hwirqs_from_offset(struct msi_bitmap *bmp, int offset, - int num); int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp); diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 37254eff7324..e2ee226464f8 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -213,8 +213,6 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) * available interrupt. */ list_for_each_entry(msi_data, &msi_head, list) { - int off; - /* * If the PCI node has an fsl,msi property, then we * restrict our search to the corresponding MSI node. @@ -226,28 +224,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (phandle && (phandle != msi_data->phandle)) continue; - /* - * Allocate the msi message so that it fits on distinct - * MSIR registers. Obviously, since MSIR registers are - * limited they will overlap at one point. - * - * Due to the format of the newly introduced MSIIR1 in - * mpic 4.3, consecutive msi message values map to - * distinct MSIRs, thus distinct msi irq cascades, so - * nothing special needs to be done in this case. - * On older mpic versions the chose distinct SRS - * values by aligning the msi message value to the - * SRS field shift. - */ - if (msi_data->feature & FSL_PIC_FTR_MPIC_4_3) { - off = 0; - } else { - off = atomic_inc_return(&msi_data->msi_alloc_cnt) % - msi_data->msir_num; - off <<= msi_data->srs_shift; - } - hwirq = msi_bitmap_alloc_hwirqs_from_offset( - &msi_data->bitmap, off, 1); + hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1); if (hwirq >= 0) break; } @@ -487,17 +464,12 @@ static int fsl_of_msi_probe(struct platform_device *dev) goto error_out; } - atomic_set(&msi->msi_alloc_cnt, -1); - p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len); if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") || of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) { msi->srs_shift = MSIIR1_SRS_SHIFT; msi->ibs_shift = MSIIR1_IBS_SHIFT; - msi->msir_num = NR_MSI_REG_MSIIR1; - msi->feature |= FSL_PIC_FTR_MPIC_4_3; - if (p) dev_warn(&dev->dev, "%s: dose not support msi-available-ranges property\n", __func__); @@ -515,7 +487,6 @@ static int fsl_of_msi_probe(struct platform_device *dev) msi->srs_shift = MSIIR_SRS_SHIFT; msi->ibs_shift = MSIIR_IBS_SHIFT; - msi->msir_num = NR_MSI_REG_MSIIR; if (p && len % (2 * sizeof(u32)) != 0) { dev_err(&dev->dev, "%s: Malformed msi-available-ranges property\n", diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index 50ec4b04c732..420cfcbdac01 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -15,7 +15,6 @@ #include #include -#include #define NR_MSI_REG_MSIIR 8 /* MSIIR can index 8 MSI registers */ #define NR_MSI_REG_MSIIR1 16 /* MSIIR1 can index 16 MSI registers */ @@ -28,8 +27,6 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 -#define FSL_PIC_FTR_MPIC_4_3 0x00000010 - struct fsl_msi_cascade_data; struct fsl_msi { @@ -40,8 +37,6 @@ struct fsl_msi { u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */ u32 ibs_shift; /* Shift of interrupt bit select */ u32 srs_shift; /* Shift of the shared interrupt register select */ - u32 msir_num; /* Number of available MSIR regs */ - atomic_t msi_alloc_cnt; /* Counter for MSI hwirq allocations */ void __iomem *msi_regs; u32 feature; struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX]; diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 8b7d8fc2b120..2ff630267e9e 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -14,28 +14,23 @@ #include #include -int msi_bitmap_alloc_hwirqs_from_offset(struct msi_bitmap *bmp, int offset, - int num) -{ - unsigned long flags; - int index; - int order = get_count_order(num); - - spin_lock_irqsave(&bmp->lock, flags); - index = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, - offset, num, (1 << order) - 1); - bitmap_set(bmp->bitmap, index, num); - spin_unlock_irqrestore(&bmp->lock, flags); - - pr_debug("msi_bitmap: found %d free bits starting from offset %d at index %d\n", - num, offset, index); - - return index; -} - int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) { - return msi_bitmap_alloc_hwirqs_from_offset(bmp, 0, num); + unsigned long flags; + int offset, order = get_count_order(num); + + spin_lock_irqsave(&bmp->lock, flags); + /* + * This is fast, but stricter than we need. We might want to add + * a fallback routine which does a linear search with no alignment. + */ + offset = bitmap_find_free_region(bmp->bitmap, bmp->irq_count, order); + spin_unlock_irqrestore(&bmp->lock, flags); + + pr_debug("msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n", + num, order, offset); + + return offset; } void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, From 2172d6606843cbad58498310a28125e7a700e658 Mon Sep 17 00:00:00 2001 From: Himangi Saraogi Date: Wed, 13 Aug 2014 14:48:47 +0530 Subject: [PATCH 028/130] powerpc/pseries: Drop unnecessary continue Continue is not needed at the bottom of a loop. The Coccinelle semantic patch implementing this change is: @@ @@ for (...;...;...) { ... if (...) { ... - continue; } } Signed-off-by: Himangi Saraogi Acked-by: Julia Lawall Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/cmm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 2d8bf15879fd..fc44ad0475f8 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -555,7 +555,6 @@ static int cmm_mem_going_offline(void *arg) pa_last = pa_last->next; free_page((unsigned long)cmm_page_list); cmm_page_list = pa_last; - continue; } } pa_curr = pa_curr->next; From cdd91b89adedb77e3e581c40788620790edc33b5 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 14 Aug 2014 12:16:39 +0530 Subject: [PATCH 029/130] powerpc/powernv: Improve error messages in dump code Presently we only support initiating Service Processor dump from host. Hence update sysfs message. Also update couple of other error/info messages. Signed-off-by: Vasant Hegde Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 85bb8fff7947..3b819959337c 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -112,7 +112,7 @@ static ssize_t init_dump_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - return sprintf(buf, "1 - initiate dump\n"); + return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n"); } static int64_t dump_fips_init(uint8_t type) @@ -121,7 +121,7 @@ static int64_t dump_fips_init(uint8_t type) rc = opal_dump_init(type); if (rc) - pr_warn("%s: Failed to initiate FipS dump (%d)\n", + pr_warn("%s: Failed to initiate FSP dump (%d)\n", __func__, rc); return rc; } @@ -131,8 +131,12 @@ static ssize_t init_dump_store(struct dump_obj *dump_obj, const char *buf, size_t count) { - dump_fips_init(DUMP_TYPE_FSP); - pr_info("%s: Initiated FSP dump\n", __func__); + int rc; + + rc = dump_fips_init(DUMP_TYPE_FSP); + if (rc == OPAL_SUCCESS) + pr_info("%s: Initiated FSP dump\n", __func__); + return count; } @@ -297,7 +301,7 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, * and rely on userspace to ask us to try * again. */ - pr_info("%s: Platform dump partially read.ID = 0x%x\n", + pr_info("%s: Platform dump partially read. ID = 0x%x\n", __func__, dump->id); return -EIO; } From c913e5f95e546d8d3a9f99ba9908f7e095cbc1fb Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Thu, 14 Aug 2014 17:05:04 +1000 Subject: [PATCH 030/130] powerpc/boot: Don't install zImage.* from make install in commit 29f1aff2c (powerpc: Copy bootable images in the default install script) we changed to copying all the built boot targets based on the assumption that it's backwards compatible. It turns out that debian devived installkernel scripts will barf if not given exactly 4 args. This change reverts make install to just install the vmlinux (we can change the dfault in a seperate patch) and introduces a new make zInstall which works with a more flexible installkernel script. Cc: Grant Likely Signed-off-by: Tony Breeds Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ccc25eddbcb8..8a5bc1cfc6aa 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -389,7 +389,12 @@ $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) @rm -f $@; ln $< $@ +# Only install the vmlinux install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" + +# Install the vmlinux and other built boot targets. +zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ # anything not in $(targets) From 3484a31fce22348b9034024cf9368ab2dbbfd0ab Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 18 Aug 2014 17:13:41 -0400 Subject: [PATCH 031/130] powerpc: Fix build error with CONFIG_PCI=n Fix ppc 32 build failure as reported here: http://kisskb.ellerman.id.au/kisskb/buildresult/11663513/ The error is as follows: arch/powerpc/include/asm/floppy.h:142:20: error: 'isa_bridge_pcidev' undeclared (first use in this function) This is happening since floppy.o is enabled by BLK_DEV_FD which depends on ARCH_MAY_HAVE_PC_FDC which is in-turn enabled if PPC_PSERIES=n. The following commit changes the dependency so that ARCH_MAY_HAVE_PC_FDC is dependent exclusively on PCI since otherwise it will not compile. Signed-off-by: Pranith Kumar Reported-by: Geert Uytterhoeven CC: Andrew Morton Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4bc7b62fb4b6..ece1c01ddb2f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -182,7 +182,7 @@ config SCHED_OMIT_FRAME_POINTER config ARCH_MAY_HAVE_PC_FDC bool - default !PPC_PSERIES || PCI + default PCI config PPC_OF def_bool y From bffe6bda342578deea0b74f2d9cb97cc40585a1b Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 19 Aug 2014 14:47:59 +1000 Subject: [PATCH 032/130] powerpc/powernv: Add OPAL check token call Currently there is no way to generically check if an OPAL call exists or not from the host kernel. This adds an OPAL call opal_check_token() which tells you if the given token is present in OPAL or not. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 86055e598269..7127b87a548d 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -135,6 +135,7 @@ struct opal_sg_list { #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 #define OPAL_RESYNC_TIMEBASE 79 +#define OPAL_CHECK_TOKEN 80 #define OPAL_DUMP_INIT 81 #define OPAL_DUMP_INFO 82 #define OPAL_DUMP_READ 83 @@ -887,6 +888,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); +int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 2e6ce1b8dc8f..571885556dfc 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -232,6 +232,7 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); From 035ed26fb090ff3277900259f19d57e54da2e116 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 19 Aug 2014 14:48:00 +1000 Subject: [PATCH 033/130] powerpc/powernv: Check OPAL RTC calls exists before using Check that the OPAL_RTC_READ token exists before we use the OPAL RTC. Refactors the code a little to merge error paths. This avoids littering the OPAL console with: "OPAL: Called with bad token 3". Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-rtc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index b1885db8fdf3..499707ddaa9c 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -42,6 +42,9 @@ unsigned long __init opal_get_boot_time(void) __be64 __h_m_s_ms; long rc = OPAL_BUSY; + if (!opal_check_token(OPAL_RTC_READ)) + goto out; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) @@ -49,16 +52,18 @@ unsigned long __init opal_get_boot_time(void) else mdelay(10); } - if (rc != OPAL_SUCCESS) { - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; - } + if (rc != OPAL_SUCCESS) + goto out; + y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); +out: + ppc_md.get_rtc_time = NULL; + ppc_md.set_rtc_time = NULL; + return 0; } void opal_get_rtc_time(struct rtc_time *tm) From 7dc992ec7b3fd875b05f49f454a922ee94af330b Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 19 Aug 2014 14:48:01 +1000 Subject: [PATCH 034/130] powerpc/powernv: Check OPAL elog calls exist before using Check that the OPAL_ELOG_READ token exists before initalising the elog infrastructure. This avoids littering the OPAL console with: "OPAL: Called with bad token 74" Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index bbdb3ffaab98..518fe95dbf24 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -295,6 +295,10 @@ int __init opal_elog_init(void) { int rc = 0; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_ELOG_READ)) + return -1; + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); if (!elog_kset) { pr_warn("%s: failed to create elog kset\n", __func__); From 831cf65b0295de75f40f8cf52ce62e5d261dab4f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 19 Aug 2014 14:48:02 +1000 Subject: [PATCH 035/130] powerpc/powernv: Check OPAL dump calls exist before using Check that the OPAL_DUMP_READ token exists before initalising the elog infrastructure. This avoids littering the OPAL console with: "OPAL: Called with bad token 91" Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 3b819959337c..23260f7dfa7a 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -427,6 +427,10 @@ void __init opal_platform_dump_init(void) { int rc; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_DUMP_READ)) + return; + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); if (!dump_kset) { pr_warn("%s: Failed to create dump kset\n", __func__); From 370a3abdbba85f5d7b8571850fa666e240893c79 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:00 +1000 Subject: [PATCH 036/130] powerpc: Move adb symbol exports next to function definitions Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 7 ------- drivers/macintosh/adb.c | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 48d17d6fca5b..351f447654cc 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -128,13 +128,6 @@ EXPORT_SYMBOL(smp_hw_index); #endif #endif -#ifdef CONFIG_ADB -EXPORT_SYMBOL(adb_request); -EXPORT_SYMBOL(adb_register); -EXPORT_SYMBOL(adb_unregister); -EXPORT_SYMBOL(adb_poll); -EXPORT_SYMBOL(adb_try_handler_change); -#endif /* CONFIG_ADB */ #ifdef CONFIG_ADB_CUDA EXPORT_SYMBOL(cuda_request); EXPORT_SYMBOL(cuda_poll); diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 9e9c56758a08..226179b975a0 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -411,6 +411,7 @@ adb_poll(void) return; adb_controller->poll(); } +EXPORT_SYMBOL(adb_poll); static void adb_sync_req_done(struct adb_request *req) { @@ -460,6 +461,7 @@ adb_request(struct adb_request *req, void (*done)(struct adb_request *), return rc; } +EXPORT_SYMBOL(adb_request); /* Ultimately this should return the number of devices with the given default id. @@ -495,6 +497,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids, mutex_unlock(&adb_handler_mutex); return ids->nids; } +EXPORT_SYMBOL(adb_register); int adb_unregister(int index) @@ -516,6 +519,7 @@ adb_unregister(int index) mutex_unlock(&adb_handler_mutex); return ret; } +EXPORT_SYMBOL(adb_unregister); void adb_input(unsigned char *buf, int nb, int autopoll) @@ -582,6 +586,7 @@ adb_try_handler_change(int address, int new_id) mutex_unlock(&adb_handler_mutex); return ret; } +EXPORT_SYMBOL(adb_try_handler_change); int adb_get_infos(int address, int *original_address, int *handler_id) From 4a1b08e84454f9eb46bcb1aaf307421a4df46de2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:01 +1000 Subject: [PATCH 037/130] powerpc: Move via-cuda symbol exports next to function definitions Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 4 ---- drivers/macintosh/via-cuda.c | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 351f447654cc..521291db59f8 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -128,10 +128,6 @@ EXPORT_SYMBOL(smp_hw_index); #endif #endif -#ifdef CONFIG_ADB_CUDA -EXPORT_SYMBOL(cuda_request); -EXPORT_SYMBOL(cuda_poll); -#endif /* CONFIG_ADB_CUDA */ EXPORT_SYMBOL(to_tm); #ifdef CONFIG_PPC32 diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index d61f271d2207..bad18130f125 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -379,6 +379,7 @@ cuda_request(struct adb_request *req, void (*done)(struct adb_request *), req->reply_expected = 1; return cuda_write(req); } +EXPORT_SYMBOL(cuda_request); static int cuda_write(struct adb_request *req) @@ -441,6 +442,7 @@ cuda_poll(void) if (cuda_irq) enable_irq(cuda_irq); } +EXPORT_SYMBOL(cuda_poll); static irqreturn_t cuda_interrupt(int irq, void *arg) From e1802b065d189cdfa25eaf6d019c222a91618b9c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:02 +1000 Subject: [PATCH 038/130] powerpc: Move more symbol exports next to function definitions Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 14 -------------- arch/powerpc/kernel/process.c | 2 ++ arch/powerpc/kernel/setup-common.c | 3 +++ arch/powerpc/kernel/time.c | 1 + arch/powerpc/mm/hash_utils_64.c | 1 + 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 521291db59f8..4a42a1fc09e3 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -96,8 +95,6 @@ EXPORT_SYMBOL(isa_mem_base); EXPORT_SYMBOL(pci_dram_offset); #endif /* CONFIG_PCI */ -EXPORT_SYMBOL(start_thread); - #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); @@ -109,7 +106,6 @@ EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -EXPORT_SYMBOL(giveup_vsx); EXPORT_SYMBOL_GPL(__giveup_vsx); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -128,8 +124,6 @@ EXPORT_SYMBOL(smp_hw_index); #endif #endif -EXPORT_SYMBOL(to_tm); - #ifdef CONFIG_PPC32 long long __ashrdi3(long long, int); long long __ashldi3(long long, int); @@ -150,10 +144,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memchr); -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); EXPORT_SYMBOL(tb_ticks_per_jiffy); @@ -189,10 +179,6 @@ EXPORT_SYMBOL(__arch_hweight32); EXPORT_SYMBOL(__arch_hweight64); #endif -#ifdef CONFIG_PPC_BOOK3S_64 -EXPORT_SYMBOL_GPL(mmu_psize_defs); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bf44ae962ab8..aa1df89c8b2a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -228,6 +228,7 @@ void giveup_vsx(struct task_struct *tsk) giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } +EXPORT_SYMBOL(giveup_vsx); void flush_vsx_to_thread(struct task_struct *tsk) { @@ -1316,6 +1317,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } +EXPORT_SYMBOL(start_thread); #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ | PR_FP_EXC_RES | PR_FP_EXC_INV) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1b0e26013a62..c933acd4e2bd 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -94,6 +94,9 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 16 }; +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 368ab374d33c..055558b953e3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1024,6 +1024,7 @@ void to_tm(int tim, struct rtc_time * tm) */ GregorianDay(tm); } +EXPORT_SYMBOL(to_tm); /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index daee7f4e5a14..bc6cc2ad037b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -92,6 +92,7 @@ extern unsigned long dart_tablebase; static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); struct hash_pte *htab_address; unsigned long htab_size_bytes; From 5889bafa933e9f096c82cc46c9b134d2562b114b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:03 +1000 Subject: [PATCH 039/130] powerpc: Remove unused 32bit symbol exports Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 4a42a1fc09e3..ab4f0bcfce28 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -45,26 +45,10 @@ #include #ifdef CONFIG_PPC32 -extern void transfer_to_handler(void); -extern void do_IRQ(struct pt_regs *regs); -extern void machine_check_exception(struct pt_regs *regs); -extern void alignment_exception(struct pt_regs *regs); -extern void program_check_exception(struct pt_regs *regs); -extern void single_step_exception(struct pt_regs *regs); -extern int sys_sigreturn(struct pt_regs *regs); - EXPORT_SYMBOL(clear_pages); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); - -EXPORT_SYMBOL(transfer_to_handler); -EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(machine_check_exception); -EXPORT_SYMBOL(alignment_exception); -EXPORT_SYMBOL(program_check_exception); -EXPORT_SYMBOL(single_step_exception); -EXPORT_SYMBOL(sys_sigreturn); #endif #ifdef CONFIG_FUNCTION_TRACER From 7b20a955c3eec58e87f3d32cc1438eab6447ff4c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:04 +1000 Subject: [PATCH 040/130] powerpc: Move lib symbol exports into arch/powerpc/lib/ppc_ksyms.c Move the lib symbol exports closer to their function definitions Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ppc_ksyms.c | 32 --------------------------- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/ppc_ksyms.c | 39 +++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 33 deletions(-) create mode 100644 arch/powerpc/lib/ppc_ksyms.c diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab4f0bcfce28..aba41f344936 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -55,24 +55,6 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); EXPORT_SYMBOL(_mcount); #endif -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); - -#ifndef CONFIG_GENERIC_CSUM -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); -#endif - -EXPORT_SYMBOL(__copy_tofrom_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(copy_page); - #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(isa_mem_base); @@ -122,17 +104,10 @@ EXPORT_SYMBOL(__cmpdi2); #endif long long __bswapdi2(long long); EXPORT_SYMBOL(__bswapdi2); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memchr); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); EXPORT_SYMBOL(tb_ticks_per_jiffy); -EXPORT_SYMBOL(cacheable_memcpy); -EXPORT_SYMBOL(cacheable_memzero); #endif #ifdef CONFIG_PPC32 @@ -156,13 +131,6 @@ EXPORT_SYMBOL(__mfdcr); #endif EXPORT_SYMBOL(empty_zero_page); -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(__arch_hweight8); -EXPORT_SYMBOL(__arch_hweight16); -EXPORT_SYMBOL(__arch_hweight32); -EXPORT_SYMBOL(__arch_hweight64); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 59fa2de9546d..9f342f134ae4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ - crtsavres.o + crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c new file mode 100644 index 000000000000..f993959647b5 --- /dev/null +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(cacheable_memcpy); +EXPORT_SYMBOL(cacheable_memzero); +#endif + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +#ifndef CONFIG_GENERIC_CSUM +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); +#endif + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif From 5144b6bfe25fda7c51f6f9d8d79cf0fe61df5c28 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:00:05 +1000 Subject: [PATCH 041/130] powerpc: Separate ppc32 symbol exports into ppc_ksyms_32.c Simplify things considerably by moving all the ppc32 specific symbol exports into its own file. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 3 + arch/powerpc/kernel/ppc_ksyms.c | 123 ++++------------------------- arch/powerpc/kernel/ppc_ksyms_32.c | 61 ++++++++++++++ 3 files changed, 79 insertions(+), 108 deletions(-) create mode 100644 arch/powerpc/kernel/ppc_ksyms_32.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 670c312d914e..502cf69b6c89 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -93,6 +93,9 @@ obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += ppc_ksyms.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_MODULES) += ppc_ksyms_32.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index aba41f344936..c4dfff6c2719 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -1,135 +1,42 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include #include -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(ISA_DMA_THRESHOLD); -EXPORT_SYMBOL(DMA_MODE_READ); -EXPORT_SYMBOL(DMA_MODE_WRITE); -#endif +EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); + +EXPORT_SYMBOL(empty_zero_page); + +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) -EXPORT_SYMBOL(isa_io_base); -EXPORT_SYMBOL(isa_mem_base); -EXPORT_SYMBOL(pci_dram_offset); -#endif /* CONFIG_PCI */ - #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif + #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); -#endif /* CONFIG_ALTIVEC */ +#endif + #ifdef CONFIG_VSX EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif /* CONFIG_VSX */ +#endif + #ifdef CONFIG_SPE EXPORT_SYMBOL(giveup_spe); -#endif /* CONFIG_SPE */ - -#ifndef CONFIG_PPC64 -EXPORT_SYMBOL(flush_instruction_cache); #endif -EXPORT_SYMBOL(flush_dcache_range); -EXPORT_SYMBOL(flush_icache_range); - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(smp_hw_index); -#endif -#endif - -#ifdef CONFIG_PPC32 -long long __ashrdi3(long long, int); -long long __ashldi3(long long, int); -long long __lshrdi3(long long, int); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -int __ucmpdi2(unsigned long long, unsigned long long); -EXPORT_SYMBOL(__ucmpdi2); -int __cmpdi2(long long, long long); -EXPORT_SYMBOL(__cmpdi2); -#endif -long long __bswapdi2(long long); -EXPORT_SYMBOL(__bswapdi2); - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(tb_ticks_per_jiffy); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(switch_mmu_context); -#endif - -#ifdef CONFIG_PPC_STD_MMU_32 -extern long mol_trampoline; -EXPORT_SYMBOL(mol_trampoline); /* For MOL */ -EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -#ifdef CONFIG_SMP -extern int mmu_hash_lock; -EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -#endif /* CONFIG_SMP */ -extern long *intercept_table; -EXPORT_SYMBOL(intercept_table); -#endif /* CONFIG_PPC_STD_MMU_32 */ -#ifdef CONFIG_PPC_DCR_NATIVE -EXPORT_SYMBOL(__mtdcr); -EXPORT_SYMBOL(__mfdcr); -#endif -EXPORT_SYMBOL(empty_zero_page); #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c new file mode 100644 index 000000000000..30ddd8a24eee --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -0,0 +1,61 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + +#if defined(CONFIG_PCI) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(smp_hw_index); +#endif + +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +int __ucmpdi2(unsigned long long, unsigned long long); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__cmpdi2); + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(tb_ticks_per_jiffy); + +EXPORT_SYMBOL(switch_mmu_context); + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ + +#ifdef CONFIG_PPC_DCR_NATIVE +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif + +EXPORT_SYMBOL(flush_instruction_cache); From e51df2c170efaeadce4d416e1825b0830de0a795 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:55:18 +1000 Subject: [PATCH 042/130] powerpc: Make a bunch of things static Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- arch/powerpc/kernel/nvram_64.c | 2 +- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/pci_of_scan.c | 2 +- arch/powerpc/kernel/prom.c | 5 +++-- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/kernel/time.c | 4 ++-- arch/powerpc/lib/feature-fixups.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/pgtable.c | 2 +- arch/powerpc/perf/core-book3s.c | 18 +++++++++--------- arch/powerpc/platforms/powernv/eeh-ioda.c | 4 ++-- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- arch/powerpc/platforms/powernv/setup.c | 2 +- arch/powerpc/platforms/powernv/smp.c | 2 +- arch/powerpc/platforms/pseries/nvram.c | 12 +++++++----- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/msi_bitmap.c | 6 +++--- 21 files changed, 43 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaaf..1f7d84e2e8b2 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -293,7 +293,7 @@ out: /* * Handle single-step exceptions following a DABR hit. */ -int __kprobes single_step_dabr_instruction(struct die_args *args) +static int __kprobes single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 28b898e68185..34f7c9b7cd96 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -567,7 +567,7 @@ static int __init nvram_init(void) return rc; } -void __exit nvram_cleanup(void) +static void __exit nvram_cleanup(void) { misc_deregister( &nvram_dev ); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b2814e23e1ed..bd84771f89fc 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1140,7 +1140,7 @@ static int reparent_resources(struct resource *parent, * as well. */ -void pcibios_allocate_bus_resources(struct pci_bus *bus) +static void pcibios_allocate_bus_resources(struct pci_bus *bus) { struct pci_bus *b; int i; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 44562aa97f16..e6245e9c7d8d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -unsigned int pci_parse_of_flags(u32 addr0, int bridge) +static unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4e139f8a69ef..cb35c5b3efca 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -386,8 +386,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_chosen_ppc(unsigned long node, + const char *uname, + int depth, void *data) { const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2e3d2bf536c5..cdb404ea3468 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -932,7 +932,7 @@ void ptrace_triggered(struct perf_event *bp, } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e736387fee6a..5a2c049c1c61 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -286,7 +286,7 @@ static void prrn_work_fn(struct work_struct *work) static DECLARE_WORK(prrn_work, prrn_work_fn); -void prrn_schedule_update(u32 scope) +static void prrn_schedule_update(u32 scope) { flush_work(&prrn_work); prrn_update_scope = scope; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 055558b953e3..7505599c2593 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -479,7 +479,7 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -void __timer_interrupt(void) +static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); @@ -643,7 +643,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -void start_cpu_decrementer(void) +static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Clear any pending timer interrupts */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee8..7ce3870d7ddd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void test_basic_patching(void) { extern unsigned int ftr_fixup_test1; extern unsigned int end_ftr_fixup_test1; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bc6cc2ad037b..5786f2a81efe 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -868,7 +868,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -unsigned int get_paca_psize(unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513c..c90e602677c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) (_PAGE_PRESENT | _PAGE_USER); } -struct page * maybe_pte_to_page(pte_t pte) +static struct page *maybe_pte_to_page(pte_t pte) { unsigned long pfn = pte_pfn(pte); struct page *page; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b7cd00b0171e..a6995d4e93d4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -59,9 +59,9 @@ struct cpu_hw_events { struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -struct power_pmu *ppmu; +static struct power_pmu *ppmu; /* * Normally, to ignore kernel events we set the FCS (freeze counters @@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -void power_pmu_flush_branch_stack(void) {} +static void power_pmu_flush_branch_stack(void) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -375,7 +375,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -void power_pmu_flush_branch_stack(void) +static void power_pmu_flush_branch_stack(void) { if (ppmu->bhrb_nr) power_pmu_bhrb_reset(); @@ -408,7 +408,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -1573,7 +1573,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1587,7 +1587,7 @@ void power_pmu_start_txn(struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(struct pmu *pmu) +static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1600,7 +1600,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(struct pmu *pmu) +static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1888,7 +1888,7 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } -struct pmu power_pmu = { +static struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index c945bed4dc9e..df5c2cc55285 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -628,8 +628,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) * Retrieve error log, which contains log from device driver * and firmware. */ -int ioda_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) { pnv_pci_dump_phb_diag_data(pe->phb, pe->data); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index df241b11d4f7..4441bfa84c06 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -385,7 +385,7 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) } } -int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) { struct pnv_ioda_pe *pe, *slave; s64 rc; @@ -1631,8 +1631,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, - u64 hub_id, int ioda_type) +static void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; struct pnv_phb *phb; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 5a0e2dc6de5f..bb1fc9b8d55e 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -307,7 +307,7 @@ static int __init pnv_probe(void) * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo */ -unsigned long pnv_get_proc_freq(unsigned int cpu) +static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 5fcfcf44e3a9..b73adc573031 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -54,7 +54,7 @@ static void pnv_smp_setup_cpu(int cpu) #endif } -int pnv_smp_kick_cpu(int nr) +static int pnv_smp_kick_cpu(int nr) { unsigned int pcpu = get_hard_smp_processor_id(nr); unsigned long start_here = diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 0cc240b7f694..11a3b617ef5d 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -276,8 +276,10 @@ static ssize_t pSeries_nvram_get_size(void) * sequence #: The unique sequence # for each event. (until it wraps) * error log: The error log from event_scan */ -int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, - int length, unsigned int err_type, unsigned int error_log_cnt) +static int nvram_write_os_partition(struct nvram_os_partition *part, + char *buff, int length, + unsigned int err_type, + unsigned int error_log_cnt) { int rc; loff_t tmp_index; @@ -330,9 +332,9 @@ int nvram_write_error_log(char * buff, int length, * * Reads nvram partition for at most 'length' */ -int nvram_read_partition(struct nvram_os_partition *part, char *buff, - int length, unsigned int *err_type, - unsigned int *error_log_cnt) +static int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index dff05b9eb946..5a4d0fc03b03 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -126,7 +126,7 @@ struct epow_errorlog { #define EPOW_MAIN_ENCLOSURE 5 #define EPOW_POWER_OFF 7 -void rtas_parse_epow_errlog(struct rtas_error_log *log) +static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e724d3186e73..125c589eeef5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -561,7 +561,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -void pSeries_cmo_feature_init(void) +static void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index be33c9768ea1..89cec0ed6a58 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -960,7 +960,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri); } -void mpic_set_destination(unsigned int virq, unsigned int cpuid) +static void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); unsigned int src = virq_to_hw(virq); diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2ff630267e9e..a7c7a9fc7530 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -143,7 +143,7 @@ void msi_bitmap_free(struct msi_bitmap *bmp) #define check(x) \ if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__); -void __init test_basics(void) +static void __init test_basics(void) { struct msi_bitmap bmp; int i, size = 512; @@ -188,7 +188,7 @@ void __init test_basics(void) kfree(bmp.bitmap); } -void __init test_of_node(void) +static void __init test_of_node(void) { u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 }; const char *expected_str = "0-9,20-24,28-39,41-99,220-255"; @@ -236,7 +236,7 @@ void __init test_of_node(void) kfree(bmp.bitmap); } -int __init msi_bitmap_selftest(void) +static int __init msi_bitmap_selftest(void) { printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n"); From 1217d34b531c76362217057ca70a8ce8950574e0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:55:19 +1000 Subject: [PATCH 043/130] powerpc: Ensure global functions include their prototype Fix a number of places where global functions were not including their prototype. This ensures the prototype and the function match. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 1 + arch/powerpc/mm/slice.c | 2 ++ arch/powerpc/oprofile/backtrace.c | 1 + arch/powerpc/platforms/powernv/subcore.c | 1 + arch/powerpc/platforms/pseries/dlpar.c | 1 + arch/powerpc/platforms/pseries/hotplug-memory.c | 1 + arch/powerpc/platforms/pseries/pci.c | 1 + 7 files changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a0738af4aba6..4866d5dbd420 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -52,6 +52,7 @@ #endif #include #include +#include #ifdef DEBUG #include diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15efc..86f6a755af0b 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -30,9 +30,11 @@ #include #include #include +#include #include #include #include +#include /* some sanity checks */ #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index f75301f2c85f..6adf55fa5d88 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include #include #include +#include #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 894ecb3eb596..c87f96b79d1a 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -24,6 +24,7 @@ #include #include "subcore.h" +#include "powernv.h" /* diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a2450b8a50a5..55a1b2f681e0 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -17,6 +17,7 @@ #include #include #include "offline_states.h" +#include "pseries.h" #include #include diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 17ee193960a0..187ecfab8362 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -20,6 +20,7 @@ #include #include #include +#include "pseries.h" unsigned long pseries_memory_block_size(void) { diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index c413ec158ff5..67e48594040c 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -29,6 +29,7 @@ #include #include #include +#include "pseries.h" #if 0 void pcibios_name_device(struct pci_dev *dev) From a38efcea56988761f89a3134145f0d5f9ea68076 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:55:20 +1000 Subject: [PATCH 044/130] powerpc: Remove stale function prototypes There were a number of prototypes for functions that no longer exist. Remove them. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bug.h | 1 - arch/powerpc/include/asm/hydra.h | 1 - arch/powerpc/include/asm/irq.h | 5 ----- arch/powerpc/include/asm/kexec.h | 1 - arch/powerpc/include/asm/page_64.h | 1 - arch/powerpc/include/asm/pgtable-ppc32.h | 2 -- arch/powerpc/include/asm/prom.h | 2 -- arch/powerpc/include/asm/rio.h | 1 - arch/powerpc/include/asm/tsi108.h | 4 ---- arch/powerpc/include/asm/udbg.h | 1 - arch/powerpc/platforms/pseries/lpar.c | 2 -- 11 files changed, 21 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3eb53d741070..3a39283333c3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,7 +133,6 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); -extern void print_backtrace(unsigned long *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 5b0c98bd46ab..1cb39c96d155 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -95,7 +95,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_SPARE 19 extern int hydra_init(void); -extern void macio_adb_init(void); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 41f13cec8a8f..e8e3a0a04eb0 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -31,11 +31,6 @@ extern atomic_t ppc_n_lost_interrupts; extern irq_hw_number_t virq_to_hw(unsigned int virq); -/** - * irq_early_init - Init irq remapping subsystem - */ -extern void irq_early_init(void); - static __inline__ int irq_canonicalize(int irq) { return irq; diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 16d7e33d35e9..19c36cba37c4 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -81,7 +81,6 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); -extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 88693cef4f3d..d0d6afb353d4 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -104,7 +104,6 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr, extern unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); -extern void slice_init_context(struct mm_struct *mm, unsigned int psize); extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8c3556..622672fef309 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -8,8 +8,6 @@ #include #include /* For sub-arch specific PPC_PIN_SIZE */ -extern unsigned long va_to_phys(unsigned long address); -extern pte_t *va_to_pte(unsigned long address); extern unsigned long ioremap_bot; #ifdef CONFIG_44x diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 74b79f07f041..7f436ba1b56f 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -76,8 +76,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window, unsigned long *busno, unsigned long *phys, unsigned long *size); -extern void kdump_move_device_tree(void); - extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h index b1d2deceeedb..ec800f28fec5 100644 --- a/arch/powerpc/include/asm/rio.h +++ b/arch/powerpc/include/asm/rio.h @@ -13,7 +13,6 @@ #ifndef ASM_PPC_RIO_H #define ASM_PPC_RIO_H -extern void platform_rio_init(void); #ifdef CONFIG_FSL_RIO extern int fsl_rio_mcheck_exception(struct pt_regs *); #else diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h index f8b60793b7a9..d531d9e173ef 100644 --- a/arch/powerpc/include/asm/tsi108.h +++ b/arch/powerpc/include/asm/tsi108.h @@ -84,10 +84,6 @@ extern u32 tsi108_pci_cfg_base; /* Exported functions */ -extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base); -extern unsigned long tsi108_get_mem_size(void); -extern unsigned long tsi108_get_cpu_clk(void); -extern unsigned long tsi108_get_sdc_clk(void); extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val); extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index b51fba10e733..78f2675f2aac 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,7 +52,6 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); -extern void __init udbg_init_wsp(void); extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 34e64237fff9..6affea7f0eba 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -59,8 +59,6 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); -extern void pSeries_find_serial_port(void); - void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); From f6026df1a4997db64e8201627421758585a15f55 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Aug 2014 08:55:21 +1000 Subject: [PATCH 045/130] powerpc: Move htab_remove_mapping function prototype into header file A recent patch added a function prototype for htab_remove_mapping in c code. Fix it. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-hash64.h | 2 ++ arch/powerpc/mm/init_64.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index d76514487d6f..92bc3a637923 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -342,6 +342,8 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); +int htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize); extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 253b4b971c8a..3481556a1880 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -233,9 +233,6 @@ static void __meminit vmemmap_create_mapping(unsigned long start, } #ifdef CONFIG_MEMORY_HOTPLUG -extern int htab_remove_mapping(unsigned long vstart, unsigned long vend, - int psize, int ssize); - static void vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { From 22e55fcfd6002accfe1e57e59cf429e6700d4ab4 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Tue, 19 Aug 2014 23:24:15 -0400 Subject: [PATCH 046/130] powerpc: Export dcr_ind_lock to fix build error Fix build error caused by missing export: ERROR: "dcr_ind_lock" [drivers/net/ethernet/ibm/emac/ibm_emac.ko] undefined! Signed-off-by: Pranith Kumar Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/dcr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index e9056e438575..2d8a101b6b9e 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -230,5 +230,6 @@ EXPORT_SYMBOL_GPL(dcr_unmap_mmio); #ifdef CONFIG_PPC_DCR_NATIVE DEFINE_SPINLOCK(dcr_ind_lock); +EXPORT_SYMBOL_GPL(dcr_ind_lock); #endif /* defined(CONFIG_PPC_DCR_NATIVE) */ From d4fe0965e20820f3dd05bcc4d89de3da29bb83aa Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Thu, 21 Aug 2014 10:41:23 +0800 Subject: [PATCH 047/130] powerpc/jump_label: use HAVE_JUMP_LABEL? CONFIG_JUMP_LABEL doesn't ensure HAVE_JUMP_LABEL, if it is not the case use maintainers's own mutex to guard the modification of global values. Signed-off-by: Zhouyi Zhou Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-tracepoints.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index d8a000a9988b..ae14c40b4b1c 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,7 @@ #include #include -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; void opal_tracepoint_regfunc(void) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6affea7f0eba..8c509d5397c6 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -640,7 +640,7 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; void hcall_tracepoint_regfunc(void) From ce6d73c94d7cfbdc3dc31860fe14af2f37d1b4fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 10 Sep 2014 21:56:38 +0200 Subject: [PATCH 048/130] powerpc: make of_device_ids const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_device_ids (i.e. compatible strings and the respective data) are not supposed to change at runtime. All functions working with of_device_ids provided by work with const of_device_ids. This allows to mark all struct of_device_id const, too. While touching these line also put the __init annotation at the right position where necessary. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ibmebus.c | 2 +- arch/powerpc/kernel/legacy_serial.c | 2 +- arch/powerpc/kernel/of_platform.c | 2 +- arch/powerpc/platforms/40x/ep405.c | 2 +- arch/powerpc/platforms/40x/ppc40x_simple.c | 2 +- arch/powerpc/platforms/40x/virtex.c | 2 +- arch/powerpc/platforms/40x/walnut.c | 2 +- arch/powerpc/platforms/44x/canyonlands.c | 2 +- arch/powerpc/platforms/44x/ebony.c | 2 +- arch/powerpc/platforms/44x/iss4xx.c | 2 +- arch/powerpc/platforms/44x/ppc44x_simple.c | 2 +- arch/powerpc/platforms/44x/ppc476.c | 2 +- arch/powerpc/platforms/44x/sam440ep.c | 2 +- arch/powerpc/platforms/44x/virtex.c | 2 +- arch/powerpc/platforms/44x/warp.c | 2 +- arch/powerpc/platforms/512x/mpc512x_shared.c | 2 +- arch/powerpc/platforms/52xx/lite5200.c | 4 ++-- arch/powerpc/platforms/52xx/media5200.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_common.c | 12 ++++++------ arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_pic.c | 4 ++-- arch/powerpc/platforms/82xx/ep8248e.c | 2 +- arch/powerpc/platforms/82xx/km82xx.c | 2 +- arch/powerpc/platforms/82xx/mpc8272_ads.c | 2 +- arch/powerpc/platforms/82xx/pq2fads.c | 2 +- arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 2 +- arch/powerpc/platforms/83xx/misc.c | 2 +- arch/powerpc/platforms/83xx/mpc834x_itx.c | 2 +- arch/powerpc/platforms/83xx/suspend.c | 4 ++-- arch/powerpc/platforms/85xx/common.c | 2 +- arch/powerpc/platforms/85xx/ppa8548.c | 2 +- arch/powerpc/platforms/85xx/sgy_cts1000.c | 4 ++-- arch/powerpc/platforms/86xx/gef_ppc9a.c | 2 +- arch/powerpc/platforms/86xx/gef_sbc310.c | 2 +- arch/powerpc/platforms/86xx/gef_sbc610.c | 2 +- arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 2 +- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 2 +- arch/powerpc/platforms/86xx/sbc8641d.c | 2 +- arch/powerpc/platforms/8xx/adder875.c | 2 +- arch/powerpc/platforms/8xx/ep88xc.c | 2 +- arch/powerpc/platforms/8xx/mpc86xads_setup.c | 2 +- arch/powerpc/platforms/8xx/mpc885ads_setup.c | 2 +- arch/powerpc/platforms/8xx/tqm8xx_setup.c | 2 +- arch/powerpc/platforms/cell/celleb_pci.c | 2 +- arch/powerpc/platforms/cell/celleb_setup.c | 2 +- arch/powerpc/platforms/embedded6xx/gamecube.c | 2 +- arch/powerpc/platforms/embedded6xx/linkstation.c | 2 +- arch/powerpc/platforms/embedded6xx/mvme5100.c | 2 +- arch/powerpc/platforms/embedded6xx/storcenter.c | 2 +- arch/powerpc/platforms/embedded6xx/wii.c | 2 +- arch/powerpc/platforms/pasemi/gpio_mdio.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/powerpc/sysdev/axonram.c | 2 +- arch/powerpc/sysdev/fsl_85xx_l2ctlr.c | 2 +- arch/powerpc/sysdev/mv64x60_dev.c | 2 +- arch/powerpc/sysdev/pmi.c | 2 +- arch/powerpc/sysdev/xilinx_intc.c | 2 +- arch/powerpc/sysdev/xilinx_pci.c | 2 +- 58 files changed, 67 insertions(+), 67 deletions(-) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 1114d13ac19f..ac86c53e2542 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */ struct bus_type ibmebus_bus_type; /* These devices will automatically be added to the bus during init */ -static struct of_device_id __initdata ibmebus_matches[] = { +static const struct of_device_id ibmebus_matches[] __initconst = { { .compatible = "IBM,lhca" }, { .compatible = "IBM,lhea" }, {}, diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 936258881c98..7b750c4ed5c7 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info { phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct of_device_id legacy_serial_parents[] __initdata = { +static const struct of_device_id legacy_serial_parents[] __initconst = { {.type = "soc",}, {.type = "tsi-bridge",}, {.type = "opb", }, diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index a7b743076720..f87bc1b4bdda 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -97,7 +97,7 @@ static int of_pci_phb_probe(struct platform_device *dev) return 0; } -static struct of_device_id of_pci_phb_ids[] = { +static const struct of_device_id of_pci_phb_ids[] = { { .type = "pci", }, { .type = "pcix", }, { .type = "pcie", }, diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c index b0389bbe4f94..ddc12a1926ef 100644 --- a/arch/powerpc/platforms/40x/ep405.c +++ b/arch/powerpc/platforms/40x/ep405.c @@ -49,7 +49,7 @@ static void __iomem *bcsr_regs; /* there's more, can't be bothered typing them tho */ -static __initdata struct of_device_id ep405_of_bus[] = { +static const struct of_device_id ep405_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c index 8f3920e5a046..b0c46375dd95 100644 --- a/arch/powerpc/platforms/40x/ppc40x_simple.c +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -24,7 +24,7 @@ #include #include -static __initdata struct of_device_id ppc40x_of_bus[] = { +static const struct of_device_id ppc40x_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index d0fc6866b00c..9aa7ae2f4164 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -17,7 +17,7 @@ #include #include -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v34-1.01.a", }, { .compatible = "xlnx,plb-v34-1.02.a", }, diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c index 8b691df72f74..f7ac2d0fcb44 100644 --- a/arch/powerpc/platforms/40x/walnut.c +++ b/arch/powerpc/platforms/40x/walnut.c @@ -28,7 +28,7 @@ #include #include -static __initdata struct of_device_id walnut_of_bus[] = { +static const struct of_device_id walnut_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c index e300dd4c89bf..22ca5430c9cb 100644 --- a/arch/powerpc/platforms/44x/canyonlands.c +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -33,7 +33,7 @@ #define BCSR_USB_EN 0x11 -static __initdata struct of_device_id ppc460ex_of_bus[] = { +static const struct of_device_id ppc460ex_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c index 6a4232bbdf88..ae893226392d 100644 --- a/arch/powerpc/platforms/44x/ebony.c +++ b/arch/powerpc/platforms/44x/ebony.c @@ -28,7 +28,7 @@ #include #include -static __initdata struct of_device_id ebony_of_bus[] = { +static const struct of_device_id ebony_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index 4241bc825800..c7c6758b3cfe 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -32,7 +32,7 @@ #include #include -static __initdata struct of_device_id iss4xx_of_bus[] = { +static const struct of_device_id iss4xx_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 3ffb915446e3..573c3d2689c6 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -24,7 +24,7 @@ #include #include -static __initdata struct of_device_id ppc44x_of_bus[] = { +static const struct of_device_id ppc44x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 33986c1a05da..58db9d083969 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -38,7 +38,7 @@ #include #include -static struct of_device_id ppc47x_of_bus[] __initdata = { +static const struct of_device_id ppc47x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c index 9e09b835758b..3ee4a03c1496 100644 --- a/arch/powerpc/platforms/44x/sam440ep.c +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -29,7 +29,7 @@ #include #include -static __initdata struct of_device_id sam440ep_of_bus[] = { +static const struct of_device_id sam440ep_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index cf96ccaa760c..ad272c17c640 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -21,7 +21,7 @@ #include #include "44x.h" -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v46-1.02.a", }, diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 3a104284b338..501333cf42cf 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -28,7 +28,7 @@ #include -static __initdata struct of_device_id warp_of_bus[] = { +static const struct of_device_id warp_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index adb95f03d4d4..e996e007bc44 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -337,7 +337,7 @@ void __init mpc512x_init_IRQ(void) /* * Nodes to do bus probe on, soc and localbus */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "fsl,mpc5121-immr", }, { .compatible = "fsl,mpc5121-localbus", }, { .compatible = "fsl,mpc5121-mbx", }, diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 1843bc932011..7492de3cf6d0 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -34,13 +34,13 @@ */ /* mpc5200 device tree match tables */ -static struct of_device_id mpc5200_cdm_ids[] __initdata = { +static const struct of_device_id mpc5200_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, {} }; -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 070d315dd6cd..32cae33c4266 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -30,7 +30,7 @@ #include #include -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index d7e94f49532a..26993826a797 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -23,12 +23,12 @@ #include /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_xlb_ids[] __initdata = { +static const struct of_device_id mpc52xx_xlb_ids[] __initconst = { { .compatible = "fsl,mpc5200-xlb", }, { .compatible = "mpc5200-xlb", }, {} }; -static struct of_device_id mpc52xx_bus_ids[] __initdata = { +static const struct of_device_id mpc52xx_bus_ids[] __initconst = { { .compatible = "fsl,mpc5200-immr", }, { .compatible = "fsl,mpc5200b-immr", }, { .compatible = "simple-bus", }, @@ -108,21 +108,21 @@ void __init mpc52xx_declare_of_platform_devices(void) /* * match tables used by mpc52xx_map_common_devices() */ -static struct of_device_id mpc52xx_gpt_ids[] __initdata = { +static const struct of_device_id mpc52xx_gpt_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpt", }, { .compatible = "mpc5200-gpt", }, /* old */ {} }; -static struct of_device_id mpc52xx_cdm_ids[] __initdata = { +static const struct of_device_id mpc52xx_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, /* old */ {} }; -static const struct of_device_id mpc52xx_gpio_simple[] = { +static const struct of_device_id mpc52xx_gpio_simple[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, {} }; -static const struct of_device_id mpc52xx_gpio_wkup[] = { +static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = { { .compatible = "fsl,mpc5200-gpio-wkup", }, {} }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 37f7a89c10f2..f8f0081759fb 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -564,7 +564,7 @@ static int mpc52xx_lpbfifo_remove(struct platform_device *op) return 0; } -static struct of_device_id mpc52xx_lpbfifo_match[] = { +static const struct of_device_id mpc52xx_lpbfifo_match[] = { { .compatible = "fsl,mpc5200-lpbfifo", }, {}, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2898b737deb7..2944bc84b9d6 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -119,12 +119,12 @@ /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_pic_ids[] __initdata = { +static const struct of_device_id mpc52xx_pic_ids[] __initconst = { { .compatible = "fsl,mpc5200-pic", }, { .compatible = "mpc5200-pic", }, {} }; -static struct of_device_id mpc52xx_sdma_ids[] __initdata = { +static const struct of_device_id mpc52xx_sdma_ids[] __initconst = { { .compatible = "fsl,mpc5200-bestcomm", }, { .compatible = "mpc5200-bestcomm", }, {} diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 79799b29ffe2..3d0c3a01143d 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -298,7 +298,7 @@ static void __init ep8248e_setup_arch(void) ppc_md.progress("ep8248e_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,ep8248e-bcsr", }, {}, diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 058cc1895c88..387b446f4161 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -180,7 +180,7 @@ static void __init km82xx_setup_arch(void) ppc_md.progress("km82xx_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 6a14cf50f4a2..d24deacf07d0 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -181,7 +181,7 @@ static void __init mpc8272_ads_setup_arch(void) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index e5f82ec8df17..3a5164ad10ad 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -168,7 +168,7 @@ static int __init pq2fads_probe(void) return of_flat_dt_is_compatible(root, "fsl,pq2fads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index e238b6a55b15..31053eec5cd0 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -213,7 +213,7 @@ static const struct i2c_device_id mcu_ids[] = { }; MODULE_DEVICE_TABLE(i2c, mcu_ids); -static struct of_device_id mcu_of_match_table[] = { +static const struct of_device_id mcu_of_match_table[] = { { .compatible = "fsl,mcu-mpc8349emitx", }, { }, }; diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 125336f750c6..ef9d01a049c1 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -114,7 +114,7 @@ void __init mpc83xx_ipic_and_qe_init_IRQ(void) } #endif /* CONFIG_QUICC_ENGINE */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus" }, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index a494fa57bdf9..80aea8c4b5a3 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -38,7 +38,7 @@ #include "mpc83xx.h" -static struct of_device_id __initdata mpc834x_itx_ids[] = { +static const struct of_device_id mpc834x_itx_ids[] __initconst = { { .compatible = "fsl,pq2pro-localbus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 4b4c081df94d..eeb80e25214d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -321,7 +321,7 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static struct of_device_id pmc_match[]; +static const struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { const struct of_device_id *match; @@ -420,7 +420,7 @@ static struct pmc_type pmc_types[] = { } }; -static struct of_device_id pmc_match[] = { +static const struct of_device_id pmc_match[] = { { .compatible = "fsl,mpc8313-pmc", .data = &pmc_types[0], diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index b564b5e23f7c..4a9ad871a168 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -14,7 +14,7 @@ #include "mpc85xx.h" -static struct of_device_id __initdata mpc85xx_common_ids[] = { +static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 3daff7c63569..12019f17f297 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -59,7 +59,7 @@ static void ppa8548_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .type = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index bb75add67084..8162b0412117 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -24,7 +24,7 @@ static struct device_node *halt_node; -static struct of_device_id child_match[] = { +static const struct of_device_id child_match[] = { { .compatible = "sgy,gpio-halt", }, @@ -147,7 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev) return 0; } -static struct of_device_id gpio_halt_match[] = { +static const struct of_device_id gpio_halt_match[] = { /* We match on the gpio bus itself and scan the children since they * wont be matched against us. We know the bus wont match until it * has been registered too. */ diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index c23f3443880a..bf17933b20f3 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -213,7 +213,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 8a6ac20686ea..8facf5873866 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -200,7 +200,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index 06c72636f299..8c9058df5642 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -190,7 +190,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index d479d68fbb2b..55413a547ea8 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -85,7 +85,7 @@ static void __init mpc8610_suspend_init(void) static inline void mpc8610_suspend_init(void) { } #endif /* CONFIG_SUSPEND */ -static struct of_device_id __initdata mpc8610_ids[] = { +static const struct of_device_id mpc8610_ids[] __initconst = { { .compatible = "fsl,mpc8610-immr", }, { .compatible = "fsl,mpc8610-guts", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index e8bf3fae5606..07ccb1b0cc7d 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -127,7 +127,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,srio", }, { .compatible = "gianfar", }, diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index b47a8fd0f3d3..6810b71d54a7 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -92,7 +92,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 82363e98f50e..61cae4c1edb8 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -92,7 +92,7 @@ static int __init adder875_probe(void) return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875"); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index e62166681d08..2bedeb7d5f8f 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -147,7 +147,7 @@ static int __init ep88xc_probe(void) return of_flat_dt_is_compatible(root, "fsl,ep88xc"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 63084640c5c5..78180c5e73ff 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -122,7 +122,7 @@ static int __init mpc86xads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc866ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 5921dcb498fd..4d62bf9dc789 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -197,7 +197,7 @@ static int __init mpc885ads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc885ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index dda607807def..bee47a2b23e6 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -124,7 +124,7 @@ static int __init tqm8xx_probe(void) return of_flat_dt_is_compatible(node, "tqc,tqm8xx"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 173568140a32..2b98a36ef8fb 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -454,7 +454,7 @@ static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { .setup = celleb_setup_fake_pci, }; -static struct of_device_id celleb_phb_match[] __initdata = { +static const struct of_device_id celleb_phb_match[] __initconst = { { .name = "pci-pseudo", .data = &celleb_fake_pci_spec, diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 1d5a4d8ddad9..34e8ce2976aa 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -102,7 +102,7 @@ static void __init celleb_setup_arch_common(void) #endif } -static struct of_device_id celleb_bus_ids[] __initdata = { +static const struct of_device_id celleb_bus_ids[] __initconst = { { .type = "scc", }, { .type = "ioif", }, /* old style */ {}, diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index a138e14bad2e..bd4ba5d7d568 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -90,7 +90,7 @@ define_machine(gamecube) { }; -static struct of_device_id gamecube_of_bus[] = { +static const struct of_device_id gamecube_of_bus[] = { { .compatible = "nintendo,flipper", }, { }, }; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 455e7c087422..168e1d80b2e5 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -21,7 +21,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "simple-bus", }, {}, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 25e3bfb64efb..1613303177e6 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -149,7 +149,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev) return 0; } -static struct of_device_id mvme5100_of_bus_ids[] __initdata = { +static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { { .compatible = "hawk-bridge", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index c458b60d14c4..d572833ebd00 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -24,7 +24,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id storcenter_of_bus[] = { +static const struct of_device_id storcenter_of_bus[] __initconst = { { .name = "soc", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 6d8dadf19f0b..388e29bab8f6 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -235,7 +235,7 @@ define_machine(wii) { .machine_shutdown = wii_shutdown, }; -static struct of_device_id wii_of_bus[] = { +static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, }; diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 15adee544638..ada33358950d 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -290,7 +290,7 @@ static int gpio_mdio_remove(struct platform_device *dev) return 0; } -static struct of_device_id gpio_mdio_match[] = +static const struct of_device_id gpio_mdio_match[] = { { .compatible = "gpio-mdio", diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 8c54de6d8ec4..d71b2c7e8403 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -393,7 +393,7 @@ static inline void pasemi_pcmcia_init(void) #endif -static struct of_device_id pasemi_bus_ids[] = { +static const struct of_device_id pasemi_bus_ids[] = { /* Unfortunately needed for legacy firmwares */ { .type = "localbus", }, { .type = "sdc", }, diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 47b6b9f81d43..ad56edc39919 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -314,7 +314,7 @@ axon_ram_remove(struct platform_device *device) return 0; } -static struct of_device_id axon_ram_device_id[] = { +static const struct of_device_id axon_ram_device_id[] = { { .type = "dma-memory" }, diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index afc2dbf37011..90545ad1626e 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -171,7 +171,7 @@ static int mpc85xx_l2ctlr_of_remove(struct platform_device *dev) return 0; } -static struct of_device_id mpc85xx_l2ctlr_of_match[] = { +static const struct of_device_id mpc85xx_l2ctlr_of_match[] = { { .compatible = "fsl,p2020-l2-cache-controller", }, diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index c2dba7db71ad..026bbc3b2c47 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -23,7 +23,7 @@ /* These functions provide the necessary setup for the mv64x60 drivers. */ -static struct of_device_id __initdata of_mv64x60_devices[] = { +static const struct of_device_id of_mv64x60_devices[] __initconst = { { .compatible = "marvell,mv64306-devctrl", }, {} }; diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 5aaf86c03893..13e67d93a7c1 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -101,7 +101,7 @@ out: } -static struct of_device_id pmi_match[] = { +static const struct of_device_id pmi_match[] = { { .type = "ibm,pmi", .name = "ibm,pmi" }, { .type = "ibm,pmi" }, {}, diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 83f943a8e0db..56f0524e47a6 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -265,7 +265,7 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static struct of_device_id xilinx_intc_match[] __initconst = { +static const struct of_device_id xilinx_intc_match[] __initconst = { { .compatible = "xlnx,opb-intc-1.00.c", }, { .compatible = "xlnx,xps-intc-1.00.a", }, {} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c index 1453b0eed220..fea5667699ed 100644 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ b/arch/powerpc/sysdev/xilinx_pci.c @@ -27,7 +27,7 @@ #define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY -static struct of_device_id xilinx_pci_match[] = { +static const struct of_device_id xilinx_pci_match[] = { { .compatible = "xlnx,plbv46-pci-1.03.a", }, {} }; From fe921c8c3957dec5022b12fb858f8cb489e1b2bb Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 13 Sep 2014 10:20:17 +0200 Subject: [PATCH 049/130] powerpc: Simplify symbol check in prom_init_check.sh Signed-off-by: Andreas Schwab Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init_check.sh | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index fe8e54b9ef7d..12640f7e726b 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -50,24 +50,14 @@ do done # ignore register save/restore funcitons - if [ "${UNDEF:0:9}" = "_restgpr_" ]; then + case $UNDEF in + _restgpr_*|_restgpr0_*|_rest32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then + ;; + _savegpr_*|_savegpr0_*|_save32gpr_*) OK=1 - fi - if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then - OK=1 - fi - if [ "${UNDEF:0:9}" = "_savegpr_" ]; then - OK=1 - fi - if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then - OK=1 - fi + ;; + esac if [ $OK -eq 0 ]; then ERROR=1 From 822e71224e07f07a07c385be869fe416ce436430 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 12 Sep 2014 14:11:41 -0500 Subject: [PATCH 050/130] pseries: Fix endian issues in onlining cpu threads The ibm,ppc-interrupt-server#s property is in big endian format. These values need to be converted when used by little endian architectures. Signed-off-by: Thomas Falcon Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 55a1b2f681e0..096337853dda 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -364,7 +364,8 @@ static int dlpar_online_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -374,8 +375,9 @@ static int dlpar_online_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); @@ -389,7 +391,7 @@ static int dlpar_online_cpu(struct device_node *dn) } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to online " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); From e36d1227776a2daa2c9aa7f997ac7083d6783f2c Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 12 Sep 2014 14:11:42 -0500 Subject: [PATCH 051/130] pseries: Fix endian issues in cpu hot-removal When removing a cpu, this patch makes sure that values gotten from or passed to firmware are in the correct endian format. Signed-off-by: Thomas Falcon Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 20 +++++++++++--------- arch/powerpc/platforms/pseries/hotplug-cpu.c | 10 ++++++---- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 096337853dda..fdf01b660d59 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -445,7 +445,8 @@ static int dlpar_offline_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -455,8 +456,9 @@ static int dlpar_offline_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) @@ -478,14 +480,14 @@ static int dlpar_offline_cpu(struct device_node *dn) * Upgrade it's state to CPU_STATE_OFFLINE. */ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, intserv[i]) + BUG_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); __cpu_die(cpu); break; } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to offline " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -497,15 +499,15 @@ out: static ssize_t dlpar_cpu_release(const char *buf, size_t count) { struct device_node *dn; - const u32 *drc_index; + u32 drc_index; int rc; dn = of_find_node_by_path(buf); if (!dn) return -EINVAL; - drc_index = of_get_property(dn, "ibm,my-drc-index", NULL); - if (!drc_index) { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { of_node_put(dn); return -EINVAL; } @@ -516,7 +518,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - rc = dlpar_release_drc(*drc_index); + rc = dlpar_release_drc(drc_index); if (rc) { of_node_put(dn); return rc; @@ -524,7 +526,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) rc = dlpar_detach_node(dn); if (rc) { - dlpar_acquire_drc(*drc_index); + dlpar_acquire_drc(drc_index); return rc; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 20d62975856f..b174fa751d26 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,7 +90,7 @@ static void rtas_stop_self(void) { static struct rtas_args args = { .nargs = 0, - .nret = 1, + .nret = cpu_to_be32(1), .rets = &args.args[0], }; @@ -312,7 +312,8 @@ static void pseries_remove_processor(struct device_node *np) { unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -322,8 +323,9 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); @@ -332,7 +334,7 @@ static void pseries_remove_processor(struct device_node *np) } if (cpu >= nr_cpu_ids) printk(KERN_WARNING "Could not find cpu to remove " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); } From ff2e466aa2339238c941dee873e553687e941f3f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 16 Sep 2014 10:47:49 +1000 Subject: [PATCH 052/130] powerpc: Add POWER8 CPU selection This allows the user to build a kernel targeted at POWER8 (ie gcc -mcpu=power8). Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 1 + arch/powerpc/platforms/Kconfig.cputype | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5687e299d0a5..132d9c681d6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -135,6 +135,7 @@ CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) +CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) # Altivec option not allowed with e500mc64 in GCC. ifeq ($(CONFIG_ALTIVEC),y) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e8bc40869cbd..26703be24cf9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -117,6 +117,10 @@ config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 +config POWER8_CPU + bool "POWER8" + depends on PPC_BOOK3S_64 + config E5500_CPU bool "Freescale e5500" depends on E500 From 423216ed3273dae18c347ce52c5ecc193cfdd4e5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 16 Sep 2014 10:49:14 +1000 Subject: [PATCH 053/130] powerpc: Use CONFIG_ARCH_HAS_FAST_MULTIPLIER I ran some tests to compare hash_64 using shifts and multiplies. The results: POWER6: ~2x slower POWER7: ~2x faster POWER8: ~2x faster Now we have a proper config option, select CONFIG_ARCH_HAS_FAST_MULTIPLIER on POWER7 and POWER8. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 26703be24cf9..35ae7ec392ec 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -116,10 +116,12 @@ config POWER6_CPU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER config E5500_CPU bool "Freescale e5500" From d6a4f70909d279004a2b3d539e240e07b1ecc1cb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 2 Sep 2014 14:23:16 +1000 Subject: [PATCH 054/130] powerpc/powernv: Don't call generic code on offline cpus On PowerNV platforms, when a CPU is offline, we put it into nap mode. It's possible that the CPU wakes up from nap mode while it is still offline due to a stray IPI. A misdirected device interrupt could also potentially cause it to wake up. In that circumstance, we need to clear the interrupt so that the CPU can go back to nap mode. In the past the clearing of the interrupt was accomplished by briefly enabling interrupts and allowing the normal interrupt handling code (do_IRQ() etc.) to handle the interrupt. This has the problem that this code calls irq_enter() and irq_exit(), which call functions such as account_system_vtime() which use RCU internally. Use of RCU is not permitted on offline CPUs and will trigger errors if RCU checking is enabled. To avoid calling into any generic code which might use RCU, we adopt a different method of clearing interrupts on offline CPUs. Since we are on the PowerNV platform, we know that the system interrupt controller is a XICS being driven directly (i.e. not via hcalls) by the kernel. Hence this adds a new icp_native_flush_interrupt() function to the native-mode XICS driver and arranges to call that when an offline CPU is woken from nap. This new function reads the interrupt from the XICS. If it is an IPI, it clears the IPI; if it is a device interrupt, it prints a warning and disables the source. Then it does the end-of-interrupt processing for the interrupt. The other thing that briefly enabling interrupts did was to check and clear the irq_happened flag in this CPU's PACA. Therefore, after flushing the interrupt from the XICS, we also clear all bits except the PACA_IRQ_HARD_DIS (interrupts are hard disabled) bit from the irq_happened flag. The PACA_IRQ_HARD_DIS flag is set by power7_nap() and is left set to indicate that interrupts are hard disabled. This means we then have to ignore that flag in power7_nap(), which is reasonable since it doesn't indicate that any interrupt event needs servicing. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xics.h | 1 + arch/powerpc/kernel/idle_power7.S | 2 +- arch/powerpc/platforms/powernv/smp.c | 6 +++--- arch/powerpc/sysdev/xics/icp-native.c | 25 +++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c855..0d050ea37a04 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -29,6 +29,7 @@ /* Native ICP */ #ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); +extern void icp_native_flush_interrupt(void); #else static inline int icp_native_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index be05841396cf..c0754bbf8118 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,7 @@ _GLOBAL(power7_powersave_common) /* Check if something happened while soft-disabled */ lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 + andi. r0,r0,~PACA_IRQ_HARD_DIS@l beq 1f cmpwi cr0,r4,0 beq 1f diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b73adc573031..4753958cd509 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -168,9 +168,9 @@ static void pnv_smp_cpu_kill_self(void) power7_nap(1); ppc64_runlatch_on(); - /* Reenable IRQs briefly to clear the IPI that woke us */ - local_irq_enable(); - local_irq_disable(); + /* Clear the IPI that woke us up */ + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; mb(); if (cpu_core_split_required()) diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index de8d9483bbe8..2fc4cf1b7557 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -155,6 +155,31 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +/* + * Called when an interrupt is received on an off-line CPU to + * clear the interrupt, so that the CPU can go back to nap mode. + */ +void icp_native_flush_interrupt(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + + if (vec == XICS_IRQ_SPURIOUS) + return; + if (vec == XICS_IPI) { + /* Clear pending IPI */ + int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); + icp_native_set_qirr(cpu, 0xff); + } else { + pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", + vec); + xics_mask_unknown_vec(vec); + } + /* EOI the interrupt */ + icp_native_set_xirr(xirr); +} + void xics_wake_cpu(int cpu) { icp_native_set_qirr(cpu, IPI_PRIORITY); From ad72a279a2b874828d1b5070ef01cf6ee6b1d62c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 28 Aug 2014 18:40:47 +1000 Subject: [PATCH 055/130] powerpc: Check flat device tree version at boot In commit e6a6928c3ea1 "of/fdt: Convert FDT functions to use libfdt", the kernel stopped supporting old flat device tree formats. The minimum supported version is now 0x10. There was a checking function added, early_init_dt_verify(), but it's not called on powerpc. The result is, if you boot with an old flat device tree, the kernel will fail to parse it correctly, think you have no memory etc. and hilarity ensues. We can't really fix it, but we can at least catch the fact that the device tree is in an unsupported format and panic(). We can't call BUG(), it's too early. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index cb35c5b3efca..59576255d22b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -642,6 +642,10 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%p)\n", params); + /* Too early to BUG_ON(), do it by hand */ + if (!early_init_dt_verify(params)) + panic("BUG: Failed verifying flat device tree, bad version?"); + /* Setup flat device-tree pointer */ initial_boot_params = params; From be96f63375a14ee8e690856ac77e579c75bd0bae Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 2 Sep 2014 14:35:07 +1000 Subject: [PATCH 056/130] powerpc: Split out instruction analysis part of emulate_step() This splits out the instruction analysis part of emulate_step() into a separate analyse_instr() function, which decodes the instruction, but doesn't execute any load or store instructions. It does execute integer instructions and branches which can be executed purely by updating register values in the pt_regs struct. For other instructions, it returns the instruction type and other details in a new instruction_op struct. emulate_step() then uses that information to execute loads, stores, cache operations, mfmsr, mtmsr[d], and (on 64-bit) sc instructions. The reason for doing this is so that the KVM code can use it instead of having its own separate instruction emulation code. Possibly the alignment interrupt handler could also use this. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 61 +++ arch/powerpc/lib/sstep.c | 897 ++++++++++++++++++------------- 2 files changed, 598 insertions(+), 360 deletions(-) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index f593b0f9b627..1a693b13ddc5 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,3 +25,64 @@ struct pt_regs; /* Emulate instructions that cause a transfer of control. */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); + +enum instruction_type { + COMPUTE, /* arith/logical/CR op, etc. */ + LOAD, + LOAD_MULTI, + LOAD_FP, + LOAD_VMX, + LOAD_VSX, + STORE, + STORE_MULTI, + STORE_FP, + STORE_VMX, + STORE_VSX, + LARX, + STCX, + BRANCH, + MFSPR, + MTSPR, + CACHEOP, + BARRIER, + SYSCALL, + MFMSR, + MTMSR, + RFI, + INTERRUPT, + UNKNOWN +}; + +#define INSTR_TYPE_MASK 0x1f + +/* Load/store flags, ORed in with type */ +#define SIGNEXT 0x20 +#define UPDATE 0x40 /* matches bit in opcode 31 instructions */ +#define BYTEREV 0x80 + +/* Cacheop values, ORed in with type */ +#define CACHEOP_MASK 0x700 +#define DCBST 0 +#define DCBF 0x100 +#define DCBTST 0x200 +#define DCBT 0x300 + +/* Size field in type word */ +#define SIZE(n) ((n) << 8) +#define GETSIZE(w) ((w) >> 8) + +#define MKOP(t, f, s) ((t) | (f) | SIZE(s)) + +struct instruction_op { + int type; + int reg; + unsigned long val; + /* For LOAD/STORE/LARX/STCX */ + unsigned long ea; + int update_reg; + /* For MFSPR */ + int spr; +}; + +extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5c09f365c842..3726a03179ab 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) { /* update forms */ - if ((instr>>26) != 47) /* stmw is not an update form */ - regs->gpr[ra] = ea; - } - } return truncate_if_32bit(regs->msr, ea); } @@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static unsigned long __kprobes xform_ea(unsigned int instr, + struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -627,26 +616,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and execute it if that can be done just by + * modifying *regs (i.e. integer arithmetic and logical instructions, + * branches, and barrier instructions). + * Returns 1 if the instruction has been executed, or 0 if not. + * Sets *op to indicate what the instruction does. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; long ival; + op->type = COMPUTE; + opcode = instr >> 26; switch (opcode) { case 16: /* bc */ + op->type = BRANCH; imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; @@ -659,26 +649,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; + if ((instr & 0xfe2) == 2) + op->type = SYSCALL; + else + op->type = UNKNOWN; + return 0; #endif case 18: /* b */ + op->type = BRANCH; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; @@ -693,6 +671,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) switch ((instr >> 1) & 0x3ff) { case 16: /* bclr */ case 528: /* bcctr */ + op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); imm = truncate_if_32bit(regs->msr, imm); @@ -703,9 +682,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 18: /* rfid, scary */ - return -1; + if (regs->msr & MSR_PR) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ + op->type = BARRIER; isync(); goto instr_done; @@ -731,6 +714,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ @@ -745,6 +729,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 854: /* eieio */ + op->type = BARRIER; eieio(); goto instr_done; } @@ -910,33 +895,30 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) switch ((instr >> 1) & 0x3ff) { case 83: /* mfmsr */ if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; @@ -954,33 +936,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mfxer */ + case SPRN_XER: /* mfxer */ regs->gpr[rd] = regs->xer; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; - case 0x100: /* mflr */ + case SPRN_LR: /* mflr */ regs->gpr[rd] = regs->link; goto instr_done; - case 0x120: /* mfctr */ + case SPRN_CTR: /* mfctr */ regs->gpr[rd] = regs->ctr; goto instr_done; + default: + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + return 0; } break; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mtxer */ + case SPRN_XER: /* mtxer */ regs->xer = (regs->gpr[rd] & 0xffffffffUL); goto instr_done; - case 0x100: /* mtlr */ + case SPRN_LR: /* mtlr */ regs->link = regs->gpr[rd]; goto instr_done; - case 0x120: /* mtctr */ + case SPRN_CTR: /* mtctr */ regs->ctr = regs->gpr[rd]; goto instr_done; + default: + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + return 0; } break; @@ -1257,294 +1249,210 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(instr, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(instr, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; - + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; } break; } /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. + * Loads and stores. */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. - */ - old_ra = regs->gpr[ra]; + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (instr >> 20) & UPDATE; switch (opcode) { case 31: - u = instr & 0x40; + u = instr & UPDATE; + op->ea = xform_ea(instr, regs); switch ((instr >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; case 21: /* ldx */ case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 8); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC case 103: /* lvx */ case 359: /* lvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(LOAD_VMX, 0, 16); + break; case 231: /* stvx */ case 487: /* stvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + break; #endif #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX case 844: /* lxvd2x */ case 876: /* lxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, u, 16); + break; case 972: /* stxvd2x */ case 1004: /* stxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, u, 16); + break; #endif /* CONFIG_VSX */ } @@ -1552,178 +1460,124 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(instr, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(instr, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } -#endif /* CONFIG_PPC32 */ - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(instr, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(instr, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(instr, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(instr, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(instr, regs); + break; case 46: /* lmw */ ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4); + op->ea = dform_ea(instr, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4); + op->ea = dform_ea(instr, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; #endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; } break; case 62: /* std[u] */ - val = regs->gpr[rd]; + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; } break; #endif /* __powerpc64__ */ } - err = -EINVAL; - - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ - } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + return 0; logical_done: if (instr & 1) @@ -1733,5 +1587,328 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) arith_done: if (instr & 1) set_cr0(regs, rd); - goto instr_done; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + +#ifdef CONFIG_PPC_FPU + fpunavail: + op->type = INTERRUPT | 0x800; + return 0; +#endif + +#ifdef CONFIG_ALTIVEC + vecunavail: + op->type = INTERRUPT | 0xf20; + return 0; +#endif + +#ifdef CONFIG_VSX + vsxunavail: + op->type = INTERRUPT | 0xf40; + return 0; +#endif +} +EXPORT_SYMBOL_GPL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + /* + * Check if we will touch kernel stack overflow + */ + if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); + return -EINVAL; + } +#endif /* CONFIG_PPC32 */ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static __kprobes void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static __kprobes void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, size; + unsigned long val; + unsigned int cr; + int rd; + + r = analyse_instr(&op, regs, instr); + if (r != 0) + return r; + + err = 0; + size = GETSIZE(op.type); + switch (op.type & INSTR_TYPE_MASK) { + case CACHEOP: + if (!address_ok(regs, op.ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(op.ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(op.ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) op.ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) op.ea); + break; + } + if (err) + return 0; + goto instr_done; + + case LARX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __get_user_asmx(val, op.ea, err, "lwarx"); + break; + case 8: + __get_user_asmx(val, op.ea, err, "ldarx"); + break; + default: + return 0; + } + if (!err) + regs->gpr[op.reg] = val; + goto ldst_done; + + case STCX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + break; + case 8: + __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + break; + default: + return 0; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + goto ldst_done; + + case LOAD: + if (regs->msr & MSR_LE) + return 0; + err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + if (!err) { + if (op.type & SIGNEXT) + do_signext(®s->gpr[op.reg], size); + if (op.type & BYTEREV) + do_byterev(®s->gpr[op.reg], size); + } + goto ldst_done; + + case LOAD_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + else + err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + goto ldst_done; +#endif + case LOAD_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + do { + err = read_mem(®s->gpr[rd], op.ea, 4, regs); + if (err) + return 0; + op.ea += 4; + } while (++rd < 32); + goto instr_done; + + case STORE: + if (regs->msr & MSR_LE) + return 0; + if ((op.type & UPDATE) && size == sizeof(long) && + op.reg == 1 && op.update_reg == 1 && + !(regs->msr & MSR_PR) && + op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(op.ea, regs); + goto ldst_done; + } + err = write_mem(op.val, op.ea, size, regs); + goto ldst_done; + + case STORE_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + else + err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); + goto ldst_done; +#endif + case STORE_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + do { + err = write_mem(regs->gpr[rd], op.ea, 4, regs); + if (err) + return 0; + op.ea += 4; + } while (++rd < 32); + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs->msr = (regs->msr & ~op.val) | (val & op.val); + goto instr_done; + +#ifdef CONFIG_PPC64 + case SYSCALL: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + if (regs->gpr[0] == 0x1ebe && + cpu_has_feature(CPU_FTR_REAL_LE)) { + regs->msr ^= MSR_LE; + goto instr_done; + } + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + + case RFI: + return -1; +#endif + } + return 0; + + ldst_done: + if (err) + return 0; + if (op.type & UPDATE) + regs->gpr[op.update_reg] = op.ea; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; } From cf87c3f6b64791ce5d4c7e591c915065d31a162d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 2 Sep 2014 14:35:08 +1000 Subject: [PATCH 057/130] powerpc: Emulate icbi, mcrf and conditional-trap instructions This extends the instruction emulation done by analyse_instr() and emulate_step() to handle a few more instructions that are found in the kernel. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 1 + arch/powerpc/lib/sstep.c | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 1a693b13ddc5..d3a42cc45a82 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -66,6 +66,7 @@ enum instruction_type { #define DCBF 0x100 #define DCBTST 0x200 #define DCBT 0x300 +#define ICBI 0x400 /* Size field in type word */ #define SIZE(n) ((n) << 8) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 3726a03179ab..209a506f8517 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -600,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static int __kprobes trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; +} + /* * Elements of 32-bit rotate and mask instructions. */ @@ -669,6 +686,13 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, return 1; case 19: switch ((instr >> 1) & 0x3ff) { + case 0: /* mcrf */ + rd = (instr >> 21) & 0x1c; + ra = (instr >> 16) & 0x1c; + val = (regs->ccr >> ra) & 0xf; + regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + goto instr_done; + case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; @@ -745,6 +769,17 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, rb = (instr >> 11) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; + case 7: /* mulli */ regs->gpr[rd] = regs->gpr[ra] * (short) instr; goto instr_done; @@ -893,6 +928,18 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 31: switch ((instr >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + goto instr_done; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + goto instr_done; +#endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) goto priv; @@ -1269,6 +1316,11 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, op->ea = xform_ea(instr, regs); op->reg = rd; return 0; + + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } @@ -1597,6 +1649,11 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, op->val = SRR1_PROGPRIV; return 0; + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; + #ifdef CONFIG_PPC_FPU fpunavail: op->type = INTERRUPT | 0x800; @@ -1714,6 +1771,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (op.reg == 0) prefetch((void *) op.ea); break; + case ICBI: + __cacheop_user_asmx(op.ea, err, "icbi"); + break; } if (err) return 0; From c9f6f4ed95d47e3319dedaf8cc31d744ac67fe6f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 2 Sep 2014 14:35:09 +1000 Subject: [PATCH 058/130] powerpc: Implement emulation of string loads and stores The size field of the op.type word is now the total number of bytes to be loaded or stored. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 59 +++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 209a506f8517..54651fc2d412 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1433,11 +1433,24 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ op->type = MKOP(LOAD, BYTEREV, 4); break; + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; + #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ @@ -1475,11 +1488,25 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ op->type = MKOP(STORE, BYTEREV, 4); op->val = byterev_4(regs->gpr[rd]); break; + case 725: + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; + case 790: /* lhbrx */ op->type = MKOP(LOAD, BYTEREV, 2); break; @@ -1553,15 +1580,14 @@ int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - op->type = MKOP(LOAD_MULTI, 0, 4); + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); op->ea = dform_ea(instr, regs); break; case 47: /* stmw */ - op->type = MKOP(STORE_MULTI, 0, 4); + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); op->ea = dform_ea(instr, regs); break; @@ -1744,7 +1770,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) int r, err, size; unsigned long val; unsigned int cr; - int rd; + int i, rd, nb; r = analyse_instr(&op, regs, instr); if (r != 0) @@ -1866,12 +1892,18 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (regs->msr & MSR_LE) return 0; rd = op.reg; - do { - err = read_mem(®s->gpr[rd], op.ea, 4, regs); + for (i = 0; i < size; i += 4) { + nb = size - i; + if (nb > 4) + nb = 4; + err = read_mem(®s->gpr[rd], op.ea, nb, regs); if (err) return 0; + if (nb < 4) /* left-justify last bytes */ + regs->gpr[rd] <<= 32 - 8 * nb; op.ea += 4; - } while (++rd < 32); + ++rd; + } goto instr_done; case STORE: @@ -1914,12 +1946,19 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (regs->msr & MSR_LE) return 0; rd = op.reg; - do { - err = write_mem(regs->gpr[rd], op.ea, 4, regs); + for (i = 0; i < size; i += 4) { + val = regs->gpr[rd]; + nb = size - i; + if (nb > 4) + nb = 4; + else + val >>= 32 - 8 * nb; + err = write_mem(val, op.ea, nb, regs); if (err) return 0; op.ea += 4; - } while (++rd < 32); + ++rd; + } goto instr_done; case MFMSR: From 70ad237515d99595ed03848bd8e549e50e83c4f2 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 27 Aug 2014 17:33:59 +0800 Subject: [PATCH 059/130] powerpc: Fix warning reported by verify_cpu_node_mapping() With commit 2fabf084b6ad ("powerpc: reorder per-cpu NUMA information's initialization"), during boottime, cpu_numa_callback() is called earlier(before their online) for each cpu, and verify_cpu_node_mapping() uses cpu_to_node() to check whether siblings are in the same node. It skips the checking for siblings that are not online yet. So the only check done here is for the bootcpu, which is online at that time. But the per-cpu numa_node cpu_to_node() uses hasn't been set up yet (which will be set up in smp_prepare_cpus()). So I saw something like following reported: [ 0.000000] CPU thread siblings 1/2/3 and 0 don't belong to the same node! As we don't actually do the checking during this early stage, so maybe we could directly call numa_setup_cpu() in do_init_bootmem(). Cc: Nishanth Aravamudan Cc: Nathan Fontenot Signed-off-by: Li Zhong Acked-by: Nishanth Aravamudan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index d7737a542fd7..9918c0200857 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1128,8 +1128,7 @@ void __init do_init_bootmem(void) * early in boot, cf. smp_prepare_cpus(). */ for_each_possible_cpu(cpu) { - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)cpu); + numa_setup_cpu((unsigned long)cpu); } } From bc3c4327c92b9ceb9a6356ec64d1b2ab2dc851f9 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 27 Aug 2014 17:34:00 +0800 Subject: [PATCH 060/130] powerpc: Only set numa node information for present cpus at boottime As Nish suggested, it makes more sense to init the numa node informatiion for present cpus at boottime, which could also avoid WARN_ON(1) in numa_setup_cpu(). With this change, we also need to change the smp_prepare_cpus() to set up numa information only on present cpus. For those possible, but not present cpus, their numa information will be set up after they are started, as the original code did before commit 2fabf084b6ad. Cc: Nishanth Aravamudan Cc: Nathan Fontenot Signed-off-by: Li Zhong Acked-by: Nishanth Aravamudan Tested-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 10 ++++++++-- arch/powerpc/mm/numa.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4866d5dbd420..71e186d5f331 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -380,8 +380,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* * numa_node_id() works after this. */ - set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); - set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu])); + if (cpu_present(cpu)) { + set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); + set_cpu_numa_mem(cpu, + local_memory_node(numa_cpu_lookup_table[cpu])); + } } cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); @@ -729,6 +732,9 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9918c0200857..3a9061e9f5dd 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1127,7 +1127,7 @@ void __init do_init_bootmem(void) * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). */ - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { numa_setup_cpu((unsigned long)cpu); } } From 297cf5025b3bda59e15d6ba1f84022ebd409925b Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 27 Aug 2014 17:34:01 +0800 Subject: [PATCH 061/130] powerpc: some changes in numa_setup_cpu() this patches changes some error handling logics in numa_setup_cpu(), when cpu node is not found, so: if the cpu is possible, but not present, -1 is kept in numa_cpu_lookup_table, so later, if the cpu is added, we could set correct numa information for it. if the cpu is present, then we set the first online node to numa_cpu_lookup_table instead of 0 ( in case 0 might not be an online node? ) Cc: Nishanth Aravamudan Cc: Nathan Fontenot Signed-off-by: Li Zhong Acked-by: Nishanth Aravamudan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a9061e9f5dd..ec32d463cad9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -538,7 +538,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid; + int nid = -1; struct device_node *cpu; /* @@ -555,19 +555,21 @@ static int numa_setup_cpu(unsigned long lcpu) if (!cpu) { WARN_ON(1); - nid = 0; - goto out; + if (cpu_present(lcpu)) + goto out_present; + else + goto out; } nid = of_node_to_nid_single(cpu); +out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; -out: + map_cpu_to_node(lcpu, nid); - of_node_put(cpu); - +out: return nid; } From a9303e1bd05751649e5408e521aab19380c7fd8a Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Thu, 21 Aug 2014 09:04:31 -0400 Subject: [PATCH 062/130] powerpc: Fix build failure on 44x Fix the following build failure drivers/built-in.o: In function `nhi_init': nhi.c:(.init.text+0x63390): undefined reference to `ehci_init_driver' by adding a dependency on USB_EHCI_HCD which supplies the ehci_init_driver(). Also we need to depend on USB_OHCI_HCD similarly Signed-off-by: Pranith Kumar Acked-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 4d88f6a19058..3e7deb2aaf5a 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -216,8 +216,8 @@ config AKEBONO select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII_WOL select USB - select USB_OHCI_HCD_PLATFORM - select USB_EHCI_HCD_PLATFORM + select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD + select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI select MMC_SDHCI_PLTFM select MMC_SDHCI_OF_476GTR From 92f792ece9c41befde64d8799fd3dde5d57407ab Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Thu, 21 Aug 2014 09:16:04 -0400 Subject: [PATCH 063/130] powerpc: Fix build failure when CONFIG_USB=y We are enabling USB unconditionally which results in following build failure drivers/built-in.o: In function `tb_drom_read': (.text+0x1b62b70): undefined reference to `usb_speed_string' make: *** [vmlinux] Error Enable USB only if USB_SUPPORT is set to avoid such failures Signed-off-by: Pranith Kumar Acked-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 3e7deb2aaf5a..82f2da28cd27 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -215,7 +215,7 @@ config AKEBONO select NET_VENDOR_IBM select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII_WOL - select USB + select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI From bdce97e94b95db409264d5ae6badd0db7628681c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Aug 2014 19:08:21 +1000 Subject: [PATCH 064/130] powerpc/ppc64: Clean up the boot-time settings display At boot we display a bunch of low level settings which can be useful to know, and can help to spot bugs when things are fundamentally misconfigured. At the moment they are very widely spaced, so that we can accommodate the line: ppc64_caches.dcache_line_size = 0xYY But we only print that line when the cache line size is not 128, ie. almost never, so it just makes the display look odd usually. The ppc64_caches prefix is redundant so remove it, which means we can align things a bit closer for the common case. While we're there replace the last use of camelCase (physicalMemorySize), and use phys_mem_size. Before: Starting Linux PPC64 #104 SMP Wed Aug 6 18:41:34 EST 2014 ----------------------------------------------------- ppc64_pft_size = 0x1a physicalMemorySize = 0x200000000 ppc64_caches.dcache_line_size = 0xf0 ppc64_caches.icache_line_size = 0xf0 htab_address = 0xdeadbeef htab_hash_mask = 0x7ffff physical_start = 0xf000bar ----------------------------------------------------- After: Starting Linux PPC64 #103 SMP Wed Aug 6 18:38:04 EST 2014 ----------------------------------------------------- ppc64_pft_size = 0x1a phys_mem_size = 0x200000000 dcache_line_size = 0xf0 icache_line_size = 0xf0 htab_address = 0xdeadbeef htab_hash_mask = 0x7ffff physical_start = 0xf000bar ----------------------------------------------------- This patch is final, no bike shedding ;) Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 75d62d63fe68..32f4c320b9dc 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -525,21 +525,23 @@ void __init setup_system(void) printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); if (ppc64_caches.iline_size != 0x80) - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif /* CONFIG_PPC_STD_MMU_64 */ + printk("htab_address = 0x%p\n", htab_address); + + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + if (PHYSICAL_START > 0) - printk("physical_start = 0x%llx\n", + printk("physical_start = 0x%llx\n", (unsigned long long)PHYSICAL_START); printk("-----------------------------------------------------\n"); From 87d99c0e2c2f9d1386d8e284a5fbc13e96adbe25 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Aug 2014 19:08:22 +1000 Subject: [PATCH 065/130] powerpc/ppc64: Print CPU/MMU/FW features at boot "Helps debug funky firmware issues". After: Starting Linux PPC64 #108 SMP Wed Aug 6 19:04:51 EST 2014 ----------------------------------------------------- ppc64_pft_size = 0x1a phys_mem_size = 0x200000000 cpu_features = 0x17fc7a6c18500249 possible = 0x1fffffff18700649 always = 0x0000000000000040 cpu_user_features = 0xdc0065c2 0xee000000 mmu_features = 0x5a000001 firmware_features = 0x00000001405a440b htab_hash_mask = 0x7ffff ----------------------------------------------------- Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 32f4c320b9dc..2ef9893a06bd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -533,6 +533,14 @@ void __init setup_system(void) if (ppc64_caches.iline_size != 0x80) printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); + #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) printk("htab_address = 0x%p\n", htab_address); From 9e34992a622a15fb915471ccdebed19a87d8d531 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Aug 2014 17:26:33 +1000 Subject: [PATCH 066/130] powerpc/mm: Unindent htab_dt_scan_page_sizes() We can unindent the bulk of htab_dt_scan_page_sizes() by returning early if the property is not found. That is nice in and of itself, but also has the advantage of making it clear that we always return success once we have found the ibm,segment-page-sizes property. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 119 ++++++++++++++++---------------- 1 file changed, 59 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5786f2a81efe..dd73b63f9479 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -334,70 +334,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); - if (prop != NULL) { - pr_info("Page sizes from device-tree:\n"); - size /= 4; - cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); - while(size > 0) { - unsigned int base_shift = be32_to_cpu(prop[0]); - unsigned int slbenc = be32_to_cpu(prop[1]); - unsigned int lpnum = be32_to_cpu(prop[2]); - struct mmu_psize_def *def; - int idx, base_idx; + if (!prop) + return 0; - size -= 3; prop += 3; - base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { - /* - * skip the pte encoding also - */ - prop += lpnum * 2; size -= lpnum * 2; - continue; - } - def = &mmu_psize_defs[base_idx]; - if (base_idx == MMU_PAGE_16M) - cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + pr_info("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); + while(size > 0) { + unsigned int base_shift = be32_to_cpu(prop[0]); + unsigned int slbenc = be32_to_cpu(prop[1]); + unsigned int lpnum = be32_to_cpu(prop[2]); + struct mmu_psize_def *def; + int idx, base_idx; - def->shift = base_shift; - if (base_shift <= 23) - def->avpnm = 0; - else - def->avpnm = (1 << (base_shift - 23)) - 1; - def->sllp = slbenc; - /* - * We don't know for sure what's up with tlbiel, so - * for now we only set it for 4K and 64K pages - */ - if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) - def->tlbiel = 1; - else - def->tlbiel = 0; - - while (size > 0 && lpnum) { - unsigned int shift = be32_to_cpu(prop[0]); - int penc = be32_to_cpu(prop[1]); - - prop += 2; size -= 2; - lpnum--; - - idx = get_idx_from_shift(shift); - if (idx < 0) - continue; - - if (penc == -1) - pr_err("Invalid penc for base_shift=%d " - "shift=%d\n", base_shift, shift); - - def->penc[idx] = penc; - pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," - " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", - base_shift, shift, def->sllp, - def->avpnm, def->tlbiel, def->penc[idx]); - } + size -= 3; prop += 3; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* skip the pte encoding also */ + prop += lpnum * 2; size -= lpnum * 2; + continue; + } + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + + def->shift = base_shift; + if (base_shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (base_shift - 23)) - 1; + def->sllp = slbenc; + /* + * We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + while (size > 0 && lpnum) { + unsigned int shift = be32_to_cpu(prop[0]); + int penc = be32_to_cpu(prop[1]); + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) + continue; + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); } - return 1; } - return 0; + + return 1; } #ifdef CONFIG_HUGETLB_PAGE From 2a58222f8220089c7b602cf5fae557e0bc0bc303 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 17 Sep 2014 10:48:26 +0800 Subject: [PATCH 067/130] powerpc/eeh: Fix kernel crash when passing through VF When doing vfio passthrough a VF, the kernel will crash with following message: [ 442.656459] Unable to handle kernel paging request for data at address 0x00000060 [ 442.656593] Faulting instruction address: 0xc000000000038b88 [ 442.656706] Oops: Kernel access of bad area, sig: 11 [#1] [ 442.656798] SMP NR_CPUS=1024 NUMA PowerNV [ 442.656890] Modules linked in: vfio_pci mlx4_core nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6t_REJECT xt_conntrack bnep bluetooth rfkill ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw tg3 nfsd be2net nfs_acl ses lockd ptp enclosure pps_core kvm_hv kvm_pr shpchp binfmt_misc kvm sunrpc uinput lpfc scsi_transport_fc ipr scsi_tgt [last unloaded: mlx4_core] [ 442.658152] CPU: 40 PID: 14948 Comm: qemu-system-ppc Not tainted 3.10.42yw-pkvm+ #37 [ 442.658219] task: c000000f7e2a9a00 ti: c000000f6dc3c000 task.ti: c000000f6dc3c000 [ 442.658287] NIP: c000000000038b88 LR: c0000000004435a8 CTR: c000000000455bc0 [ 442.658352] REGS: c000000f6dc3f580 TRAP: 0300 Not tainted (3.10.42yw-pkvm+) [ 442.658419] MSR: 9000000000009032 CR: 28004882 XER: 20000000 [ 442.658577] CFAR: c00000000000908c DAR: 0000000000000060 DSISR: 40000000 SOFTE: 1 GPR00: c0000000004435a8 c000000f6dc3f800 c0000000012b1c10 c00000000da24000 GPR04: 0000000000000003 0000000000001004 00000000000015b3 000000000000ffff GPR08: c00000000127f5d8 0000000000000000 000000000000ffff 0000000000000000 GPR12: c000000000068078 c00000000fdd6800 000001003c320c80 000001003c3607f0 GPR16: 0000000000000001 00000000105480c8 000000001055aaa8 000001003c31ab18 GPR20: 000001003c10fb40 000001003c360ae8 000000001063bcf0 000000001063bdb0 GPR24: 000001003c15ed70 0000000010548f40 c000001fe5514c88 c000001fe5514cb0 GPR28: c00000000da24000 0000000000000000 c00000000da24000 0000000000000003 [ 442.659471] NIP [c000000000038b88] .pcibios_set_pcie_reset_state+0x28/0x130 [ 442.659530] LR [c0000000004435a8] .pci_set_pcie_reset_state+0x28/0x40 [ 442.659585] Call Trace: [ 442.659610] [c000000f6dc3f800] [00000000000719e0] 0x719e0 (unreliable) [ 442.659677] [c000000f6dc3f880] [c0000000004435a8] .pci_set_pcie_reset_state+0x28/0x40 [ 442.659757] [c000000f6dc3f900] [c000000000455bf8] .reset_fundamental+0x38/0x80 [ 442.659835] [c000000f6dc3f980] [c0000000004562a8] .pci_dev_specific_reset+0xa8/0xf0 [ 442.659913] [c000000f6dc3fa00] [c0000000004448c4] .__pci_dev_reset+0x44/0x430 [ 442.659980] [c000000f6dc3fab0] [c000000000444d5c] .pci_reset_function+0x7c/0xc0 [ 442.660059] [c000000f6dc3fb30] [d00000001c141ab8] .vfio_pci_open+0xe8/0x2b0 [vfio_pci] [ 442.660139] [c000000f6dc3fbd0] [c000000000586c30] .vfio_group_fops_unl_ioctl+0x3a0/0x630 [ 442.660219] [c000000f6dc3fc90] [c000000000255fbc] .do_vfs_ioctl+0x4ec/0x7c0 [ 442.660286] [c000000f6dc3fd80] [c000000000256364] .SyS_ioctl+0xd4/0xf0 [ 442.660354] [c000000f6dc3fe30] [c000000000009e54] syscall_exit+0x0/0x98 [ 442.660420] Instruction dump: [ 442.660454] 4bfffce9 4bfffee4 7c0802a6 fbc1fff0 fbe1fff8 f8010010 f821ff81 7c7e1b78 [ 442.660566] 7c9f2378 60000000 60000000 e93e02c8 2fa30000 41de00c4 2b9f0002 [ 442.660679] ---[ end trace a64ac9546bcf0328 ]--- [ 442.660724] The reason is current VF is not EEH enabled. This patch introduces a macro to convert eeh_dev to eeh_pe. By doing so, it will prevent converting with NULL pointer. Signed-off-by: Wei Yang Acked-by: Gavin Shan CC: Michael Ellerman V3 -> V4: 1. move the macro definition from include/linux/pci.h to arch/powerpc/include/asm/eeh.h V2 -> V3: 1. rebased on 3.17-rc4 2. introduce a macro 3. use this macro in several other places V1 -> V2: 1. code style and patch subject adjustment Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 5 +++++ arch/powerpc/kernel/eeh.c | 4 ++-- arch/powerpc/kernel/eeh_pe.c | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 9983c3d26bca..757014fe23d3 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -146,6 +146,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev) +{ + return edev ? edev->pe : NULL; +} + /* Return values from eeh_ops::next_error */ enum { EEH_NEXT_ERR_NONE = 0, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 59a64f8dc85f..0f1b63714718 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -410,7 +410,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) } dn = eeh_dev_to_of_node(edev); dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; + pe = eeh_dev_to_pe(edev); /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { @@ -634,7 +634,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; + struct eeh_pe *pe = eeh_dev_to_pe(edev); if (!pe) { pr_err("%s: No PE found on PCI device %s\n", diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 00e3844525a6..5864017e2bd9 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -428,7 +428,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) } /* Remove the EEH device */ - pe = edev->pe; + pe = eeh_dev_to_pe(edev); edev->pe = NULL; list_del(&edev->list); From 45eb47242df87acbf26c0c2c0a7ab677661153c6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 21 Sep 2014 10:55:05 -0700 Subject: [PATCH 068/130] powerpc: pci-ioda: Remove unnecessary return value from printk The return value is unnecessary and unused, so make the functions void instead of int. Signed-off-by: Joe Perches Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4441bfa84c06..7518aa19a2ef 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -42,12 +42,11 @@ #include "pci.h" #define define_pe_printk_level(func, kern_level) \ -static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ +static void func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ char pfix[32]; \ - int r; \ \ va_start(args, fmt); \ \ @@ -61,12 +60,10 @@ static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ sprintf(pfix, "%04x:%02x ", \ pci_domain_nr(pe->pbus), \ pe->pbus->number); \ - r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ - pfix, pe->pe_number, &vaf); \ + printk(kern_level "pci %s: [PE# %.3d] %pV", \ + pfix, pe->pe_number, &vaf); \ \ va_end(args); \ - \ - return r; \ } \ define_pe_printk_level(pe_err, KERN_ERR); From 6d31c2fa0eee89e31ca48a66ccfaf71ef545c474 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 21 Sep 2014 10:55:06 -0700 Subject: [PATCH 069/130] powerpc: pci-ioda: Use a single function to emit logging messages No need for 3 functions when a single one will do. Modify the function declaring macros to call the single function. Reduces object code size a little: $ size arch/powerpc/platforms/powernv/pci-ioda.o* text data bss dec hex filename 22303 1073 6680 30056 7568 arch/powerpc/platforms/powernv/pci-ioda.o.new 22840 1121 6776 30737 7811 arch/powerpc/platforms/powernv/pci-ioda.o.old Signed-off-by: Joe Perches Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 56 ++++++++++++----------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7518aa19a2ef..bc79bbdbb6c7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -41,34 +41,36 @@ #include "powernv.h" #include "pci.h" -#define define_pe_printk_level(func, kern_level) \ -static void func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - char pfix[32]; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - if (pe->pdev) \ - strlcpy(pfix, dev_name(&pe->pdev->dev), \ - sizeof(pfix)); \ - else \ - sprintf(pfix, "%04x:%02x ", \ - pci_domain_nr(pe->pbus), \ - pe->pbus->number); \ - printk(kern_level "pci %s: [PE# %.3d] %pV", \ - pfix, pe->pe_number, &vaf); \ - \ - va_end(args); \ -} \ +static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + char pfix[32]; -define_pe_printk_level(pe_err, KERN_ERR); -define_pe_printk_level(pe_warn, KERN_WARNING); -define_pe_printk_level(pe_info, KERN_INFO); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (pe->pdev) + strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + else + sprintf(pfix, "%04x:%02x ", + pci_domain_nr(pe->pbus), pe->pbus->number); + + printk("%spci %s: [PE# %.3d] %pV", + level, pfix, pe->pe_number, &vaf); + + va_end(args); +} + +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) /* * stdcix is only supposed to be used in hypervisor real mode as per From de5946c03575fb8c222610a6ac6726a5deabad46 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 19 Sep 2014 09:40:19 +1000 Subject: [PATCH 070/130] powerpc: Implement load_unaligned_zeropad Implement a bi-arch and bi-endian version of load_unaligned_zeropad. Since the fallback case is so rare, a userspace test harness was used to test this on ppc64le, ppc64 and ppc32: http://ozlabs.org/~anton/junkcode/test_load_unaligned_zeropad.c It uses mprotect to force a SEGV across a page boundary, and a SEGV handler to lookup the exception tables and run the fixup routine. It also compares the result against a normal load. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/word-at-a-time.h | 40 +++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 9a5c928bb3c6..07cc121e8a79 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -116,4 +116,44 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, #endif +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset, tmp; + + asm( + "1: " PPC_LL "%[ret], 0(%[addr])\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: " +#ifdef __powerpc64__ + "clrrdi %[tmp], %[addr], 3\n\t" + "clrlsldi %[offset], %[addr], 61, 3\n\t" + "ld %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "sld %[ret], %[ret], %[offset]\n\t" +#else + "srd %[ret], %[ret], %[offset]\n\t" +#endif +#else + "clrrwi %[tmp], %[addr], 2\n\t" + "clrlslwi %[offset], %[addr], 30, 3\n\t" + "lwz %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "slw %[ret], %[ret], %[offset]\n\t" +#else + "srw %[ret], %[ret], %[offset]\n\t" +#endif +#endif + "b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + PPC_LONG_ALIGN "\n\t" + PPC_LONG "1b,3b\n" + ".previous" + : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) + : [addr] "b" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + #endif /* _ASM_WORD_AT_A_TIME_H */ From fe2a1bb1dbff1bc7b8c24eb1f691a544488617fa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 25 Sep 2014 16:45:11 +1000 Subject: [PATCH 071/130] selftests/powerpc: Add test of load_unaligned_zero_pad() It is a rarely exercised case, so we want to have a test to ensure it works as required. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/word-at-a-time.h | 13 +- tools/testing/selftests/powerpc/Makefile | 2 +- .../selftests/powerpc/primitives/Makefile | 17 ++ .../powerpc/primitives/asm/asm-compat.h | 1 + .../powerpc/primitives/asm/ppc-opcode.h | 0 .../primitives/load_unaligned_zeropad.c | 147 ++++++++++++++++++ .../powerpc/primitives/word-at-a-time.h | 1 + 7 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/primitives/Makefile create mode 120000 tools/testing/selftests/powerpc/primitives/asm/asm-compat.h create mode 100644 tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h create mode 100644 tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c create mode 120000 tools/testing/selftests/powerpc/primitives/word-at-a-time.h diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 07cc121e8a79..ea52b51be401 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -116,6 +116,15 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, #endif +/* + * We use load_unaligned_zero() in a selftest, which builds a userspace + * program. Some linker scripts seem to discard the .fixup section, so allow + * the test code to use a different section name. + */ +#ifndef FIXUP_SECTION +#define FIXUP_SECTION ".fixup" +#endif + static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long ret, offset, tmp; @@ -123,7 +132,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) asm( "1: " PPC_LL "%[ret], 0(%[addr])\n" "2:\n" - ".section .fixup,\"ax\"\n" + ".section " FIXUP_SECTION ",\"ax\"\n" "3: " #ifdef __powerpc64__ "clrrdi %[tmp], %[addr], 3\n\t" @@ -156,4 +165,6 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) return ret; } +#undef FIXUP_SECTION + #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 74a78cedce37..f6ff90a76bd7 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR export CC CFLAGS -TARGETS = pmu copyloops mm tm +TARGETS = pmu copyloops mm tm primitives endif diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile new file mode 100644 index 000000000000..ea737ca01732 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -0,0 +1,17 @@ +CFLAGS += -I$(CURDIR) + +PROGS := load_unaligned_zeropad + +all: $(PROGS) + +$(PROGS): ../harness.c + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) *.o + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h new file mode 120000 index 000000000000..b14255e15a25 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h @@ -0,0 +1 @@ +../.././../../../../arch/powerpc/include/asm/asm-compat.h \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c new file mode 100644 index 000000000000..d1b647509596 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c @@ -0,0 +1,147 @@ +/* + * Userspace test harness for load_unaligned_zeropad. Creates two + * pages and uses mprotect to prevent access to the second page and + * a SEGV handler that walks the exception tables and runs the fixup + * routine. + * + * The results are compared against a normal load that is that is + * performed while access to the second page is enabled via mprotect. + * + * Copyright (C) 2014 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define FIXUP_SECTION ".ex_fixup" + +#include "word-at-a-time.h" + +#include "utils.h" + + +static int page_size; +static char *mem_region; + +static int protect_region(void) +{ + if (mprotect(mem_region + page_size, page_size, PROT_NONE)) { + perror("mprotect"); + return 1; + } + + return 0; +} + +static int unprotect_region(void) +{ + if (mprotect(mem_region + page_size, page_size, PROT_READ|PROT_WRITE)) { + perror("mprotect"); + return 1; + } + + return 0; +} + +extern char __start___ex_table[]; +extern char __stop___ex_table[]; + +#if defined(__powerpc64__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] +#elif defined(__powerpc__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP] +#else +#error implement UCONTEXT_NIA +#endif + +static int segv_error; + +static void segv_handler(int signr, siginfo_t *info, void *ptr) +{ + ucontext_t *uc = (ucontext_t *)ptr; + unsigned long addr = (unsigned long)info->si_addr; + unsigned long *ip = &UCONTEXT_NIA(uc); + unsigned long *ex_p = (unsigned long *)__start___ex_table; + + while (ex_p < (unsigned long *)__stop___ex_table) { + unsigned long insn, fixup; + + insn = *ex_p++; + fixup = *ex_p++; + + if (insn == *ip) { + *ip = fixup; + return; + } + } + + printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr); + segv_error++; +} + +static void setup_segv_handler(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = segv_handler; + action.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &action, NULL); +} + +static int do_one_test(char *p, int page_offset) +{ + unsigned long should; + unsigned long got; + + FAIL_IF(unprotect_region()); + should = *(unsigned long *)p; + FAIL_IF(protect_region()); + + got = load_unaligned_zeropad(p); + + if (should != got) + printf("offset %u load_unaligned_zeropad returned 0x%lx, should be 0x%lx\n", page_offset, got, should); + + return 0; +} + +static int test_body(void) +{ + unsigned long i; + + page_size = getpagesize(); + mem_region = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + + FAIL_IF(mem_region == MAP_FAILED); + + for (i = 0; i < page_size; i++) + mem_region[i] = i; + + memset(mem_region+page_size, 0, page_size); + + setup_segv_handler(); + + for (i = 0; i < page_size; i++) + FAIL_IF(do_one_test(mem_region+i, i)); + + FAIL_IF(segv_error); + + return 0; +} + +int main(void) +{ + return test_harness(test_body, "load_unaligned_zeropad"); +} diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h new file mode 120000 index 000000000000..eb74401b591f --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/include/asm/word-at-a-time.h \ No newline at end of file From 8989aa4adacd02174d1f72a00af8d669934a2b7a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 19 Sep 2014 09:40:20 +1000 Subject: [PATCH 072/130] powerpc: ppc64le optimised word at a time Use cmpb which compares each byte in two 64 bit values and for each matching byte places 0xff in the target and 0x00 otherwise. A simple hash_name microbenchmark: http://ozlabs.org/~anton/junkcode/hash_name_bench.c shows this version to be 10-20% faster than running the x86 version on POWER8, depending on the length. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/word-at-a-time.h | 79 ++++++++++++++++------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index ea52b51be401..5b3a903adae6 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -42,32 +42,65 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #else +#ifdef CONFIG_64BIT + +/* unused */ +struct word_at_a_time { +}; + +#define WORD_AT_A_TIME_CONSTANTS { } + +/* This will give us 0xff for a NULL char and 0x00 elsewhere */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long ret; + unsigned long zero = 0; + + asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero)); + *bits = ret; + + return ret; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +/* Alan Modra's little-endian strlen tail for 64-bit */ +static inline unsigned long create_zero_mask(unsigned long bits) +{ + unsigned long leading_zero_bits; + long trailing_zero_bit_mask; + + asm("addi %1,%2,-1\n\t" + "andc %1,%1,%2\n\t" + "popcntd %0,%1" + : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) + : "r" (bits)); + + return leading_zero_bits; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return mask >> 3; +} + +/* This assumes that we never ask for an all 1s bitmask */ +static inline unsigned long zero_bytemask(unsigned long mask) +{ + return (1UL << mask) - 1; +} + +#else /* 32-bit case */ + struct word_at_a_time { const unsigned long one_bits, high_bits; }; #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } -#ifdef CONFIG_64BIT - -/* Alan Modra's little-endian strlen tail for 64-bit */ -#define create_zero_mask(mask) (mask) - -static inline unsigned long find_zero(unsigned long mask) -{ - unsigned long leading_zero_bits; - long trailing_zero_bit_mask; - - asm ("addi %1,%2,-1\n\t" - "andc %1,%1,%2\n\t" - "popcntd %0,%1" - : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (mask)); - return leading_zero_bits >> 3; -} - -#else /* 32-bit case */ - /* * This is largely generic for little-endian machines, but the * optimal byte mask counting is probably going to be something @@ -96,8 +129,6 @@ static inline unsigned long find_zero(unsigned long mask) return count_masked_bytes(mask); } -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -114,7 +145,9 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) -#endif +#endif /* CONFIG_64BIT */ + +#endif /* __BIG_ENDIAN__ */ /* * We use load_unaligned_zero() in a selftest, which builds a userspace From a75c380c7129c432f8ac9d42ebc170e5f7d9d31e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 19 Sep 2014 09:40:21 +1000 Subject: [PATCH 073/130] powerpc: Enable DCACHE_WORD_ACCESS on ppc64le Enable on DCACHE_WORD_ACCESS on ppc64le. It should work on ppc64 and ppc32 but we need to do some testing first. A somewhat reasonable testcase used to show the performance improvement - a repeated stat of a 33 byte filename that doesn't exist: #include #include #include #define ITERATIONS 10000000 #define PATH "123456781234567812345678123456781" int main(void) { unsigned long i; struct stat buf; for (i = 0; i < ITERATIONS; i++) stat(PATH, &buf); return 0; } runs 27% faster on POWER8. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ece1c01ddb2f..56da47247fcc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,7 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN From 3e938052fb7655c91ff031dd93f064b4087a1387 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:50 +1000 Subject: [PATCH 074/130] powerpc/eeh: Drop unused argument in eeh_check_failure() eeh_check_failure() is used to check frozen state of the PE which owns the indicated I/O address. The argument "val" of the function isn't used. The patch drops it and return the frozen state of the PE as expected. Cc: Vishal Mansur Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 29 ++++++++++++++--------------- arch/powerpc/kernel/eeh.c | 15 ++++++--------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 757014fe23d3..c79869d4a652 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -274,8 +274,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); -unsigned long eeh_check_failure(const volatile void __iomem *token, - unsigned long val); +int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); void eeh_add_device_early(struct device_node *); @@ -326,9 +325,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data) static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } -static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +static inline int eeh_check_failure(const volatile void __iomem *token) { - return val; + return 0; } #define eeh_dev_check_failure(x) (0) @@ -359,7 +358,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr) { u8 val = in_8(addr); if (EEH_POSSIBLE_ERROR(val, u8)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -367,7 +366,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr) { u16 val = in_le16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -375,7 +374,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr) { u32 val = in_le32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -383,7 +382,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr) { u64 val = in_le64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -391,7 +390,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr) { u16 val = in_be16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -399,7 +398,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr) { u32 val = in_be32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -407,7 +406,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr) { u64 val = in_be64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -421,7 +420,7 @@ static inline void eeh_memcpy_fromio(void *dest, const * were copied. Check all four bytes. */ if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32)) - eeh_check_failure(src, *((u32 *)(dest + n - 4))); + eeh_check_failure(src); } /* in-string eeh macros */ @@ -430,7 +429,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf, { _insb(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8)) - eeh_check_failure(addr, *(u8*)buf); + eeh_check_failure(addr); } static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, @@ -438,7 +437,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, { _insw(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16)) - eeh_check_failure(addr, *(u16*)buf); + eeh_check_failure(addr); } static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, @@ -446,7 +445,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, { _insl(addr, buf, nl); if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32)) - eeh_check_failure(addr, *(u32*)buf); + eeh_check_failure(addr); } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0f1b63714718..db35c2722201 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -542,17 +542,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) + * @token: I/O address * - * Check for an EEH failure at the given token address. Call this + * Check for an EEH failure at the given I/O address. Call this * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine + * find out if this is due to an EEH slot freeze event. This routine * will query firmware for the EEH status. * * Note this routine is safe to call in an interrupt context. */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +int eeh_check_failure(const volatile void __iomem *token) { unsigned long addr; struct eeh_dev *edev; @@ -562,13 +561,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; - return val; + return 0; } - eeh_dev_check_failure(edev); - return val; + return eeh_dev_check_failure(edev); } - EXPORT_SYMBOL(eeh_check_failure); From 940376b3a463303787c6227c0327612653bf5600 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:51 +1000 Subject: [PATCH 075/130] powerpc/eeh: Add eeh_pe_state sysfs entry The patch adds sysfs entry "eeh_pe_state". Reading on it returns the PE's state while writing to it clears the frozen state. It's used to check or clear the PE frozen state from userland for debugging purpose. The patch also replaces printk(KERN_WARNING ...) with pr_warn() in eeh_sysfs.c Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_sysfs.c | 60 ++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e2595ba4b720..eb15be4d8849 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -54,6 +54,62 @@ EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); +static ssize_t eeh_pe_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int state; + + if (!edev || !edev->pe) + return -ENODEV; + + state = eeh_ops->get_state(edev->pe, NULL); + return sprintf(buf, "%08x %08x\n", + state, edev->pe->state); +} + +static ssize_t eeh_pe_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int ret; + + if (!edev || !edev->pe) + return -ENODEV; + + /* Nothing to do if it's not frozen */ + if (!(edev->pe->state & EEH_PE_ISOLATED)) + return count; + + /* Enable MMIO */ + ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n", + __func__, ret, edev->pe->phb->global_number, + edev->pe->addr); + return -EIO; + } + + /* Enable DMA */ + ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA for PHB#%d-PE#%d\n", + __func__, ret, edev->pe->phb->global_number, + edev->pe->addr); + return -EIO; + } + + /* Clear software state */ + eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); + + return count; +} + +static DEVICE_ATTR_RW(eeh_pe_state); + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -68,9 +124,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); if (rc) - printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + pr_warn("EEH: Unable to create sysfs entries\n"); else if (edev) edev->mode |= EEH_DEV_SYSFS; } @@ -92,6 +149,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); if (edev) edev->mode &= ~EEH_DEV_SYSFS; From 0d5ee5205e62908172bf5e1a5fd171ba262fdb75 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:52 +1000 Subject: [PATCH 076/130] powerpc/eeh: Freeze PE before PE reset The patch adds one more option (EEH_OPT_FREEZE_PE) to set_option() method to proactively freeze PE, which will be issued before resetting pass-throughed PE to drop MMIO access during reset because it's always contributing to recursive EEH error. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 7 ++++ arch/powerpc/platforms/powernv/eeh-ioda.c | 43 +++++++++++++++----- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 +- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index c79869d4a652..59d3e2a4fc42 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -172,6 +172,7 @@ enum { #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_OPT_FREEZE_PE 4 /* Freeze PE */ #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index db35c2722201..633f6bf965c3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1382,6 +1382,13 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: + /* + * Proactively freeze the PE to drop all MMIO access + * during reset, which should be banned as it's always + * cause recursive EEH error. + */ + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + ret = eeh_ops->reset(pe, option); break; default: diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index df5c2cc55285..1d8f37b93457 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -189,6 +189,7 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; + bool freeze_pe = false; int enable, ret = 0; s64 rc; @@ -212,6 +213,10 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) case EEH_OPT_THAW_DMA: enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + enable = OPAL_EEH_ACTION_SET_FREEZE_ALL; + break; default: pr_warn("%s: Invalid option %d\n", __func__, option); @@ -219,17 +224,35 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) } /* If PHB supports compound PE, to handle it */ - if (phb->unfreeze_pe) { - ret = phb->unfreeze_pe(phb, pe->addr, enable); + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing " + "PHB#%x-PE#%x\n", + __func__, rc, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } } else { - rc = opal_pci_eeh_freeze_clear(phb->opal_id, - pe->addr, - enable); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", - __func__, rc, option, phb->hose->global_number, - pe->addr); - ret = -EIO; + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, enable); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d " + "for PHB#%x-PE#%x\n", + __func__, rc, option, + phb->hose->global_number, pe->addr); + ret = -EIO; + } } } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b08053819d99..b645dc60e000 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -349,7 +349,9 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) if (pe->addr) config_addr = pe->addr; break; - + case EEH_OPT_FREEZE_PE: + /* Not support */ + return 0; default: pr_err("%s: Invalid option %d\n", __func__, option); From 316233ff878451e198e3633fd9165c437007a309 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:53 +1000 Subject: [PATCH 077/130] powerpc/eeh: Reenable PCI devices after reset The PCI devices that have been passed through are enabled before reset, we need restore to the enabled state after reset. Otherwise, MMIO access might be issued to disabled devices after reset and causes exceptional recursive EEH error. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 62 ++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 633f6bf965c3..6b4690f315d3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1342,6 +1342,53 @@ int eeh_pe_get_state(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_get_state); +static int eeh_pe_reenable_devices(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + int ret = 0; + + /* Restore config space */ + eeh_pe_restore_bars(pe); + + /* + * Reenable PCI devices as the devices passed + * through are always enabled before the reset. + */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + ret = pci_reenable_device(pdev); + if (ret) { + pr_warn("%s: Failure %d reenabling %s\n", + __func__, ret, pci_name(pdev)); + return ret; + } + } + + /* The PE is still in frozen state */ + ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling MMIO for PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA for PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + /* Clear software isolated state */ + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return ret; +} + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE @@ -1368,17 +1415,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) if (ret) break; - /* - * The PE is still in frozen state and we need to clear - * that. It's good to clear frozen state after deassert - * to avoid messy IO access during reset, which might - * cause recursive frozen PE. - */ - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); - if (!ret) - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); - if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + ret = eeh_pe_reenable_devices(pe); break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: @@ -1417,9 +1454,6 @@ int eeh_pe_configure(struct eeh_pe *pe) if (!pe) return -ENODEV; - /* Restore config space for the affected devices */ - eeh_pe_restore_bars(pe); - return ret; } EXPORT_SYMBOL_GPL(eeh_pe_configure); From 404079c87e2f390611b71c8f03b9f5ddb6241131 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:54 +1000 Subject: [PATCH 078/130] powerpc/eeh: Clear frozen state on passing device When passing through device, its PE might have been put into frozen state. One obvious example would be: the passed PE is forced to be offline because of hitting maximal allowed EEH errors in userland. In that case, the frozen state won't be cleared and then the PE is returned back to host, which might not have chance detecting and recovering from it. The patch adds more check when passing through device and clear the PE frozen state if necessary. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 6b4690f315d3..f5677684429e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1150,6 +1150,8 @@ void eeh_remove_device(struct pci_dev *dev) int eeh_dev_open(struct pci_dev *pdev) { struct eeh_dev *edev; + int flag = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int ret = -ENODEV; mutex_lock(&eeh_dev_mutex); @@ -1162,6 +1164,38 @@ int eeh_dev_open(struct pci_dev *pdev) if (!edev || !edev->pe) goto out; + /* + * The PE might have been put into frozen state, but we + * didn't detect that yet. The passed through PCI devices + * in frozen PE won't work properly. Clear the frozen state + * in advance. + */ + ret = eeh_ops->get_state(edev->pe, NULL); + if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT && + (ret & flag) != flag) { + ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling MMIO " + "for PHB#%x-PE#%x\n", + __func__, ret, edev->phb->global_number, + edev->pe->addr); + goto out; + } + + ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA " + "for PHB#%x-PE#%x\n", + __func__, ret, edev->phb->global_number, + edev->pe->addr); + goto out; + } + } + + /* Clear software isolated state */ + if (edev->pe->state & EEH_PE_ISOLATED) + eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); + /* Increase PE's pass through count */ atomic_inc(&edev->pe->pass_dev_cnt); mutex_unlock(&eeh_dev_mutex); @@ -1169,7 +1203,7 @@ int eeh_dev_open(struct pci_dev *pdev) return 0; out: mutex_unlock(&eeh_dev_mutex); - return -ENODEV; + return ret; } EXPORT_SYMBOL_GPL(eeh_dev_open); From 5b64234081fcbd48c4ae207ce2cc3c31f859f8a4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:55 +1000 Subject: [PATCH 079/130] powerpc/powernv: Sync header with firmware The patch synchronizes firmware header file (opal.h) for PCI error injection. Signed-off-by: Mike Qiu Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 32 +++++++++++++++++++ .../powerpc/platforms/powernv/opal-wrappers.S | 1 + 2 files changed, 33 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 7127b87a548d..f1579419e5c8 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -148,6 +148,7 @@ struct opal_sg_list { #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 #define OPAL_DUMP_INFO2 94 +#define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 #define OPAL_HANDLE_HMI 98 #define OPAL_REGISTER_DUMP_REGION 101 @@ -200,6 +201,35 @@ enum OpalPciErrorSeverity { OPAL_EEH_SEV_INF = 5 }; +enum OpalErrinjectType { + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR = 0, + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64 = 1, +}; + +enum OpalErrinjectFunc { + /* IOA bus specific errors */ + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR = 0, + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA = 1, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR = 2, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA = 3, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR = 4, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA = 5, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR = 6, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA = 7, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR = 8, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA = 9, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR = 10, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA = 11, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR = 12, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA = 13, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER = 14, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET = 15, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR = 16, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA = 17, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER = 18, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET = 19, +}; + enum OpalShpcAction { OPAL_SHPC_GET_LINK_STATE = 0, OPAL_SHPC_GET_SLOT_STATE = 1 @@ -820,6 +850,8 @@ int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); +int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type, + uint32_t func, uint64_t addr, uint64_t mask); int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 571885556dfc..15942b6ffd3a 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -184,6 +184,7 @@ OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER) OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); From 131c123abec375afc819da89925dbce97590278e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:56 +1000 Subject: [PATCH 080/130] powerpc/eeh: Introduce eeh_ops::err_inject The patch introduces eeh_ops::err_inject(), which allows to inject specified errors to indicated PE for testing purpose. The functionality isn't support on pSeries platform. On PowerNV, the functionality relies on OPAL API opal_pci_err_inject(). Signed-off-by: Mike Qiu Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 + arch/powerpc/platforms/powernv/eeh-ioda.c | 44 ++++++++++++++++++++ arch/powerpc/platforms/powernv/eeh-powernv.c | 26 ++++++++++++ arch/powerpc/platforms/powernv/pci.h | 2 + arch/powerpc/platforms/pseries/eeh_pseries.c | 1 + 5 files changed, 75 insertions(+) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 59d3e2a4fc42..4fa15796537a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -204,6 +204,8 @@ struct eeh_ops { int (*wait_state)(struct eeh_pe *pe, int max_wait); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 1d8f37b93457..1ac80db1489b 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -673,6 +673,49 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe) return 0; } +static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + s64 ret; + + /* Sanity check on error type */ + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } + + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } + + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + ret = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (ret != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, ret, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; +} + static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) { /* GEM */ @@ -994,5 +1037,6 @@ struct pnv_eeh_ops ioda_eeh_ops = { .reset = ioda_eeh_reset, .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, + .err_inject = ioda_eeh_err_inject, .next_error = ioda_eeh_next_error }; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index fd7a16f855ed..3e89cbf55885 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -358,6 +358,31 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe) return ret; } +/** + * powernv_pe_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE for + * testing purpose. + */ +static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->err_inject) + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + + return ret; +} + /** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE @@ -414,6 +439,7 @@ static struct eeh_ops powernv_eeh_ops = { .wait_state = powernv_eeh_wait_state, .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, + .err_inject = powernv_eeh_err_inject, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, .next_error = powernv_eeh_next_error, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 48494d4b6058..27594cf7f76d 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -85,6 +85,8 @@ struct pnv_eeh_ops { int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*next_error)(struct eeh_pe **pe); }; #endif /* CONFIG_EEH */ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b645dc60e000..4fc5ff96f418 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -731,6 +731,7 @@ static struct eeh_ops pseries_eeh_ops = { .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, + .err_inject = NULL, .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, From 7a062782295a896f697137aacbe23400fbbafa94 Mon Sep 17 00:00:00 2001 From: Mike Qiu Date: Tue, 30 Sep 2014 12:38:57 +1000 Subject: [PATCH 081/130] powerpc/powernv: Add PCI error injection debugfs entry The patch adds debugfs file (/sys/kernel/debug/powerpc/PCIxxxx/ err_injct), which accepts following formated string, to support error injection. It will be used to support userland utility "errinjct" in future. "pe_no:0:function:address:mask" - 32-bits PCI errors "pe_no:1:function:address:mask" - 64-bits PCI errors Signed-off-by: Mike Qiu Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-ioda.c | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 1ac80db1489b..22609f23f455 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -66,6 +66,54 @@ static struct notifier_block ioda_eeh_nb = { }; #ifdef CONFIG_DEBUG_FS +static ssize_t ioda_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct pnv_phb *phb = hose->private_data; + struct eeh_dev *edev; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!phb->eeh_ops || !phb->eeh_ops->err_inject) + return -ENXIO; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + edev->phb = hose; + edev->pe_config_addr = pe_no; + pe = eeh_pe_get(edev); + kfree(edev); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations ioda_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = ioda_eeh_ei_write, +}; + static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) { struct pci_controller *hose = data; @@ -152,6 +200,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose) if (!phb->has_dbgfs && phb->dbgfs) { phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &ioda_eeh_ei_fops); + debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, &ioda_eeh_outb_dbgfs_ops); From d9df1b5da17cd328301def1d2ae2c2df35f3823c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:58 +1000 Subject: [PATCH 082/130] powerpc/powernv: Clear PAPR error injection registers The frozen state on one specific PE is probably caused by error injection, which is done with help of PAPR error injection registers. According to the hardware spec, those registers should be cleared automatically after one-shot frozen PE. However, that's not always true, at least on P7IOC of Firebird-L. So we have to clear them before doing PE reset to avoid recursive EEH errors at recovery stage. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-ioda.c | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 22609f23f455..d6fb90b32525 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -682,6 +682,31 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) if (pe->type & EEH_PE_PHB) { ret = ioda_eeh_phb_reset(hose, option); } else { + struct pnv_phb *phb; + s64 rc; + + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing " + "error injection registers\n", + __func__, rc); + return -EIO; + } + } + bus = eeh_pe_bus_get(pe); if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) From 22fca17924094113fe79c1db5135290e1a84ad4b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:38:59 +1000 Subject: [PATCH 083/130] powerpc/eeh: Clear frozen device state in time The problem was reported by Carol: In the scenario of passing mlx4 adapter to guest, EEH error could be recovered successfully. When returning the device back to host, the driver (mlx4_core.ko) couldn't be loaded successfully because of error number -5 (-EIO) returned from mlx4_get_ownership(), which hits offlined PCI device. The root cause is that we missed to put the affected devices into normal state on clearing PE isolated state right after PE reset. The patch fixes above issue by putting the affected devices to normal state when clearing PE isolated state in eeh_pe_state_clear(). Cc: stable@vger.kernel.org Reported-by: Carol L. Soto Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_pe.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 5864017e2bd9..53dd0915e690 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && @@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pe->state &= ~state; - /* Clear check count since last isolation */ - if (state & EEH_PE_ISOLATED) - pe->check_count = 0; + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) + return NULL; + + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + pdev->error_state = pci_channel_io_normal; + } return NULL; } From 4d4f577e4b5ee1299096438bbcf743bbe14f33ab Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:00 +1000 Subject: [PATCH 084/130] powerpc/eeh: Fix improper condition in eeh_pci_enable() The function eeh_pci_enable() is called to apply various requests to one particular PE: Enabling EEH, Disabling EEH, Enabling IO, Enabling DMA, Freezing PE. When enabling IO or DMA on one specific PE, we need check that IO or DMA isn't enabled previously. But the condition used to do the check isn't completely correct because one PE would be in DMA frozen state with workable IO path, or vice versa. The patch fixes the improper condition. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 58 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f5677684429e..b79a8331965f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -579,25 +579,51 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int active_flag, rc; /* * pHyp doesn't allow to enable IO or DMA on unfrozen PE. * Also, it's pointless to enable them on unfrozen PE. So - * we have the check here. + * we have to check before enabling IO or DMA. */ - if (function == EEH_OPT_THAW_MMIO || - function == EEH_OPT_THAW_DMA) { + switch (function) { + case EEH_OPT_THAW_MMIO: + active_flag = EEH_STATE_MMIO_ACTIVE; + break; + case EEH_OPT_THAW_DMA: + active_flag = EEH_STATE_DMA_ACTIVE; + break; + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_FREEZE_PE: + active_flag = 0; + break; + default: + pr_warn("%s: Invalid function %d\n", + __func__, function); + return -EINVAL; + } + + /* + * Check if IO or DMA has been enabled before + * enabling them. + */ + if (active_flag) { rc = eeh_ops->get_state(pe, NULL); if (rc < 0) return rc; - /* Needn't to enable or already enabled */ - if ((rc == EEH_STATE_NOT_SUPPORT) || - ((rc & flags) == flags)) + /* Needn't enable it at all */ + if (rc == EEH_STATE_NOT_SUPPORT) + return 0; + + /* It's already enabled */ + if (rc & active_flag) return 0; } + + /* Issue the request */ rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " @@ -605,17 +631,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) - return rc; + /* Check if the request is finished successfully */ + if (active_flag) { + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc <= 0) + return rc; - if ((function == EEH_OPT_THAW_MMIO) && - (rc & EEH_STATE_MMIO_ENABLED)) - return 0; + if (rc & active_flag) + return 0; - if ((function == EEH_OPT_THAW_DMA) && - (rc & EEH_STATE_DMA_ENABLED)) - return 0; + return -EIO; + } return rc; } From 4eeeff0ebcdeabf3f76c4eece0593e98c6619be8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:01 +1000 Subject: [PATCH 085/130] powerpc/eeh: Unfreeze PE on enabling EEH functionality When passing through PE to guest, that's possibly in frozen state. The driver for the pass-through devices on guest side can't be loaded successfully as reported. We already had one gate in eeh_dev_open() to clear PE frozen state accordingly, but that's not enough because the function is only called at QEMU startup for once. The patch adds another gate in eeh_pe_set_option() so that the PE frozen state can be cleared at QEMU restart time. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 60 ++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 4fa15796537a..b793fdfb37f3 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -286,6 +286,7 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); int eeh_dev_open(struct pci_dev *pdev); void eeh_dev_release(struct pci_dev *pdev); struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b79a8331965f..b569ce2a8037 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1164,6 +1164,31 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +{ + int ret; + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + /* Clear software isolated state */ + if (sw_state && (pe->state & EEH_PE_ISOLATED)) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return ret; +} + /** * eeh_dev_open - Increase count of pass through devices for PE * @pdev: PCI device @@ -1176,7 +1201,6 @@ void eeh_remove_device(struct pci_dev *dev) int eeh_dev_open(struct pci_dev *pdev) { struct eeh_dev *edev; - int flag = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int ret = -ENODEV; mutex_lock(&eeh_dev_mutex); @@ -1196,31 +1220,9 @@ int eeh_dev_open(struct pci_dev *pdev) * in frozen PE won't work properly. Clear the frozen state * in advance. */ - ret = eeh_ops->get_state(edev->pe, NULL); - if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT && - (ret & flag) != flag) { - ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_MMIO); - if (ret) { - pr_warn("%s: Failure %d enabling MMIO " - "for PHB#%x-PE#%x\n", - __func__, ret, edev->phb->global_number, - edev->pe->addr); - goto out; - } - - ret = eeh_ops->set_option(edev->pe, EEH_OPT_THAW_DMA); - if (ret) { - pr_warn("%s: Failure %d enabling DMA " - "for PHB#%x-PE#%x\n", - __func__, ret, edev->phb->global_number, - edev->pe->addr); - goto out; - } - } - - /* Clear software isolated state */ - if (edev->pe->state & EEH_PE_ISOLATED) - eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); + ret = eeh_unfreeze_pe(edev->pe, true); + if (ret) + goto out; /* Increase PE's pass through count */ atomic_inc(&edev->pe->pass_dev_cnt); @@ -1338,8 +1340,10 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) */ switch (option) { case EEH_OPT_ENABLE: - if (eeh_enabled()) + if (eeh_enabled()) { + ret = eeh_unfreeze_pe(pe, true); break; + } ret = -EIO; break; case EEH_OPT_DISABLE: @@ -1351,7 +1355,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) break; } - ret = eeh_ops->set_option(pe, option); + ret = eeh_pci_enable(pe, option); break; default: pr_debug("%s: Option %d out of range (%d, %d)\n", From c9dd0143978e1c473c08dafe8156259b895db3e2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:02 +1000 Subject: [PATCH 086/130] powerpc/eeh: Use eeh_unfreeze_pe() The patch uses eeh_unfreeze_pe() to replace the logic clearing frozen IO and DMA, in order to simplify the code. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 19 +------------------ arch/powerpc/kernel/eeh_driver.c | 18 ++++++------------ arch/powerpc/kernel/eeh_sysfs.c | 21 +-------------------- 3 files changed, 8 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b569ce2a8037..c79583fe1905 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1433,24 +1433,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) } /* The PE is still in frozen state */ - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); - if (ret) { - pr_warn("%s: Failure %d enabling MMIO for PHB#%x-PE#%x\n", - __func__, ret, pe->phb->global_number, pe->addr); - return ret; - } - - ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); - if (ret) { - pr_warn("%s: Failure %d enabling DMA for PHB#%x-PE#%x\n", - __func__, ret, pe->phb->global_number, pe->addr); - return ret; - } - - /* Clear software isolated state */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - - return ret; + return eeh_unfreeze_pe(pe, true); } /** diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6a0dcee8e931..948e6f99089f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -450,21 +450,15 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - int i, rc; + int i, rc = 1; - for (i = 0; i < 3; i++) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc) - continue; - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (!rc) - break; - } + for (i = 0; rc && i < 3; i++) + rc = eeh_unfreeze_pe(pe, false); - /* The PE has been isolated, clear it */ + /* Stop immediately on any errors */ if (rc) { - pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", + __func__, rc, pe->phb->global_number, pe->addr); return (void *)pe; } diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index eb15be4d8849..9a44010bd4b5 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -75,7 +75,6 @@ static ssize_t eeh_pe_state_store(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); - int ret; if (!edev || !edev->pe) return -ENODEV; @@ -84,26 +83,8 @@ static ssize_t eeh_pe_state_store(struct device *dev, if (!(edev->pe->state & EEH_PE_ISOLATED)) return count; - /* Enable MMIO */ - ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_MMIO); - if (ret) { - pr_warn("%s: Failure %d enabling MMIO for PHB#%d-PE#%d\n", - __func__, ret, edev->pe->phb->global_number, - edev->pe->addr); + if (eeh_unfreeze_pe(edev->pe, true)) return -EIO; - } - - /* Enable DMA */ - ret = eeh_pci_enable(edev->pe, EEH_OPT_THAW_DMA); - if (ret) { - pr_warn("%s: Failure %d enabling DMA for PHB#%d-PE#%d\n", - __func__, ret, edev->pe->phb->global_number, - edev->pe->addr); - return -EIO; - } - - /* Clear software state */ - eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); return count; } From 9372dddb189a7a7689b674ad2dd31117a407bfd5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:03 +1000 Subject: [PATCH 087/130] powerpc/eeh: Block PCI config access during reset Function pcibios_set_pcie_reset_state() can be used to do PCI reset. PCI config access during the reset usually causes EEH errors unexpectedly. In order to avoid the EEH error, the patch blocks PCI config access during reset with the help of flag EEH_PE_RESET, which is similar to what we did in EEH PE reset path. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c79583fe1905..eb266f4ee9be 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -668,14 +668,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + eeh_pe_state_clear(pe, EEH_PE_RESET); break; case pcie_hot_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: + eeh_pe_state_clear(pe, EEH_PE_RESET); return -EINVAL; }; From 4ba5a0fc6441194cf238e1a049f548b5d45760c8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:04 +1000 Subject: [PATCH 088/130] powerpc/pseries: Decrease message level on EEH initialization As Anton suggested, the patch decreases the message level on EEH initialization to avoid unnecessary messages if required. Also, we have unified hint if any of needful RTAS calls is missed, and then we can check /proc/device-tree to figure out the missed RTAS calls. Suggested-by: Anton Blanchard Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/eeh_pseries.c | 35 ++++++-------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 4fc5ff96f418..a6c7e19f5eb3 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -88,29 +88,14 @@ static int pseries_eeh_init(void) * and its variant since the old firmware probably support address * of domain/bus/slot/function for EEH RTAS operations. */ - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service invalid\n", - __func__); - return -EINVAL; - } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service invalid\n", - __func__); - return -EINVAL; - } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service and " - " invalid\n", - __func__); - return -EINVAL; - } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service invalid\n", - __func__); - return -EINVAL; - } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && - ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: RTAS service and " - " invalid\n", - __func__); + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && + ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) { + pr_info("EEH functionality not supported\n"); return -EINVAL; } @@ -118,11 +103,11 @@ static int pseries_eeh_init(void) spin_lock_init(&slot_errbuf_lock); eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_warn("%s: unknown EEH error log size\n", + pr_info("%s: unknown EEH error log size\n", __func__); eeh_error_buf_size = 1024; } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_warn("%s: EEH error log size %d exceeds the maximal %d\n", + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } From d1a85eee35f15a20f508c42d7664dce335eefc2d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:05 +1000 Subject: [PATCH 089/130] powerpc/powernv: Sync OpalPciResetScope with firmware The names of PCI reset scopes aren't sychronized with firmware. The patch fixes it. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 9 ++++++--- arch/powerpc/platforms/powernv/eeh-ioda.c | 12 ++++++------ arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index f1579419e5c8..a084cf5bd76c 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -387,9 +387,12 @@ enum OpalM64EnableAction { }; enum OpalPciResetScope { - OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, - OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, - OPAL_PCI_IODA_TABLE_RESET = 6, + OPAL_RESET_PHB_COMPLETE = 1, + OPAL_RESET_PCI_LINK = 2, + OPAL_RESET_PHB_ERROR = 3, + OPAL_RESET_PCI_HOT = 4, + OPAL_RESET_PCI_FUNDAMENTAL = 5, + OPAL_RESET_PCI_IODA_TABLE = 6 }; enum OpalPciReinitScope { diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index d6fb90b32525..ecb293ef1fec 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -514,11 +514,11 @@ int ioda_eeh_phb_reset(struct pci_controller *hose, int option) if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -558,15 +558,15 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) */ if (option == EEH_RESET_FUNDAMENTAL) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_RESET_PCI_FUNDAMENTAL, OPAL_ASSERT_RESET); else if (option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -697,7 +697,7 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) (option == EEH_RESET_HOT || option == EEH_RESET_FUNDAMENTAL)) { rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_ERROR, + OPAL_RESET_PHB_ERROR, OPAL_ASSERT_RESET); if (rc != OPAL_SUCCESS) { pr_warn("%s: Failure %lld clearing " diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index bc79bbdbb6c7..8685dacccb0b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1626,7 +1626,7 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) { - opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } @@ -1802,7 +1802,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ - rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); + rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); From 93e8b36d7bf5c54f1c52d8b78e34f88e52a3dfa2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:06 +1000 Subject: [PATCH 090/130] powerpc/eeh: Tag reset state for user owned PE PE would be owned by userland, which probably request PE reset done in host side. During the reset, we should drop the PCI config accesses to the PE with help of flag EEH_PE_RESET. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index eb266f4ee9be..693690827785 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1463,6 +1463,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); + eeh_pe_state_clear(pe, EEH_PE_RESET); if (ret) break; @@ -1477,6 +1478,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) */ eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + eeh_pe_state_mark(pe, EEH_PE_RESET); ret = eeh_ops->reset(pe, option); break; default: From 5cfb20b96f624e9852c4f3f1c4397e81ca28d5aa Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:07 +1000 Subject: [PATCH 091/130] powerpc/eeh: Emulate EEH recovery for VFIO devices When enabling EEH functionality on passed through devices (PE) with VFIO, the devices in the PE would be removed permanently from guest side. In that case, the PE remains frozen state. When returning PE to host, or restarting the guest again, we had mechanism unfreezing the PE by clearing PESTA/B frozen bits. However, that's not enough for some adapters, which are indicated as following "lspci" shows. Those adapters require hot reset on the parent bus to bring their firmware back to workable state. Otherwise, those adaptrs won't be operative and the host (for returning case) or the guest will fail to load the drivers for those adapters without exception. 0000:01:00.0 Ethernet controller: Emulex Corporation OneConnect \ 10Gb NIC (be3) (rev 02) 0000:01:00.0 0200: 19a2:0710 (rev 02) 0001:03:00.0 Ethernet controller: Emulex Corporation OneConnect \ NIC (Lancer) (rev 10) 0001:03:00.0 0200: 10df:e220 (rev 10) The patch adds mechanism to emulate EEH recovery (for hot reset on parent PCI bus) on 3 gates to fix the issue: open/release one adapter of the PE, enable EEH functionality on one adapter of the PE. Reported-by: Murilo Fossa Vicentini Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 59 ++++++++++++++++++++- arch/powerpc/kernel/eeh_driver.c | 90 ++++++++++++++++++++++++++++++-- 3 files changed, 144 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b793fdfb37f3..3b260efbfbf9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -287,6 +287,7 @@ void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); +int eeh_pe_reset_and_recover(struct eeh_pe *pe); int eeh_dev_open(struct pci_dev *pdev); void eeh_dev_release(struct pci_dev *pdev); struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 693690827785..3350b8490dbc 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1193,6 +1193,60 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) return ret; } + +static struct pci_device_id eeh_reset_ids[] = { + { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ + { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { 0 } +}; + +static int eeh_pe_change_owner(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + struct pci_device_id *id; + int flags, ret; + + /* Check PE state */ + flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + ret = eeh_ops->get_state(pe, NULL); + if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) + return 0; + + /* Unfrozen PE, nothing to do */ + if ((ret & flags) == flags) + return 0; + + /* Frozen PE, check if it needs PE level reset */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { + if (id->vendor != PCI_ANY_ID && + id->vendor != pdev->vendor) + continue; + if (id->device != PCI_ANY_ID && + id->device != pdev->device) + continue; + if (id->subvendor != PCI_ANY_ID && + id->subvendor != pdev->subsystem_vendor) + continue; + if (id->subdevice != PCI_ANY_ID && + id->subdevice != pdev->subsystem_device) + continue; + + goto reset; + } + } + + return eeh_unfreeze_pe(pe, true); + +reset: + return eeh_pe_reset_and_recover(pe); +} + /** * eeh_dev_open - Increase count of pass through devices for PE * @pdev: PCI device @@ -1224,7 +1278,7 @@ int eeh_dev_open(struct pci_dev *pdev) * in frozen PE won't work properly. Clear the frozen state * in advance. */ - ret = eeh_unfreeze_pe(edev->pe, true); + ret = eeh_pe_change_owner(edev->pe); if (ret) goto out; @@ -1265,6 +1319,7 @@ void eeh_dev_release(struct pci_dev *pdev) /* Decrease PE's pass through count */ atomic_dec(&edev->pe->pass_dev_cnt); WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); + eeh_pe_change_owner(edev->pe); out: mutex_unlock(&eeh_dev_mutex); } @@ -1345,7 +1400,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) switch (option) { case EEH_OPT_ENABLE: if (eeh_enabled()) { - ret = eeh_unfreeze_pe(pe, true); + ret = eeh_pe_change_owner(pe); break; } ret = -EIO; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 948e6f99089f..3fd514f8e4b2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } +static void *eeh_dev_save_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_save_state(pdev); + return NULL; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata) return NULL; } +static void *eeh_dev_restore_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_restore_state(pdev); + return NULL; +} + /** * eeh_report_resume - Tell device to resume normal operations * @data: eeh device @@ -450,10 +482,11 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; + bool *clear_sw_state = flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) - rc = eeh_unfreeze_pe(pe, false); + rc = eeh_unfreeze_pe(pe, clear_sw_state); /* Stop immediately on any errors */ if (rc) { @@ -465,17 +498,66 @@ static void *__eeh_clear_pe_frozen_state(void *data, void *flag) return NULL; } -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, + bool clear_sw_state) { void *rc; - rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return rc ? -EIO : 0; } +int eeh_pe_reset_and_recover(struct eeh_pe *pe) +{ + int result, ret; + + /* Bail if the PE is being recovered */ + if (pe->state & EEH_PE_RECOVERING) + return 0; + + /* Put the PE into recovery mode */ + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + + /* Save states */ + eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); + + /* Report error */ + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Issue reset */ + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_reset_pe(pe); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); + return ret; + } + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Unfreeze the PE */ + ret = eeh_clear_pe_frozen_state(pe, true); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + return ret; + } + + /* Notify completion of reset */ + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + + /* Restore device state */ + eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); + + /* Resume */ + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + /* Clear recovery mode */ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + + return 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -534,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_state_clear(pe, EEH_PE_RESET); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) return rc; From f2e0be5e76dd626c70f5aa5c6165b4dfa1d14c64 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:08 +1000 Subject: [PATCH 092/130] powerpc/eeh: Dump PCI config space for all child devices The PEs can be organized as nested. Current implementation doesn't dump PCI config space for subordinate devices of child PEs. However, the frozen PE could be caused by those subordinate devices of its child PEs. The patch dumps PCI config space for all subordinate devices of the problematic PE. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 3350b8490dbc..d543e4179c18 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -117,7 +117,7 @@ static DEFINE_MUTEX(eeh_dev_mutex); * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. */ -#define EEH_PCI_REGS_LOG_LEN 4096 +#define EEH_PCI_REGS_LOG_LEN 8192 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; /* @@ -148,16 +148,12 @@ static int __init eeh_setup(char *str) } __setup("eeh=", eeh_setup); -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. +/* + * This routine captures assorted PCI configuration space data + * for the indicated PCI device, and puts them into a buffer + * for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) +static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); u32 cfg; @@ -255,6 +251,19 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) return n; } +static void *eeh_dump_pe_log(void *data, void *flag) +{ + struct eeh_pe *pe = data; + struct eeh_dev *edev, *tmp; + size_t *plen = flag; + + eeh_pe_for_each_dev(pe, edev, tmp) + *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, + EEH_PCI_REGS_LOG_LEN - *plen); + + return NULL; +} + /** * eeh_slot_error_detail - Generate combined log including driver log and error log * @pe: EEH PE @@ -268,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev, *tmp; /* * When the PHB is fenced or dead, it's pointless to collect @@ -286,10 +294,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev, tmp) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, - EEH_PCI_REGS_LOG_LEN - loglen); - } + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); From 372fb80db93af5d85c750515526d731856e1890c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:09 +1000 Subject: [PATCH 093/130] powerpc/powernv: Fetch frozen PE on top level It should have been part of commit 1ad7a72c5 ("powerpc/eeh: Report frozen parent PE prior to child PE"). There are 2 ways to report EEH errors: proactively polling because of 0xFF's returned from PCI config or IO read, or interrupt driven event. We missed to report and handle parent frozen PE prior to child frozen PE for the later case on PowerNV platform. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-ioda.c | 48 ++++++++++++++++------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index ecb293ef1fec..426814a2ede3 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -886,14 +886,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, * the master PE because slave PE is invisible * to EEH core. */ - if (phb->get_pe_state) { - pnv_pe = &phb->ioda.pe_array[pe_no]; - if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { - pnv_pe = pnv_pe->master; - WARN_ON(!pnv_pe || - !(pnv_pe->flags & PNV_IODA_PE_MASTER)); - pe_no = pnv_pe->pe_number; - } + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; } /* Find the PE according to PE# */ @@ -904,15 +902,37 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, if (!dev_pe) return -EEXIST; - /* - * At this point, we're sure the compound PE should - * be put into frozen state. - */ + /* Freeze the (compound) PE */ *pe = dev_pe; - if (phb->freeze_pe && - !(dev_pe->state & EEH_PE_ISOLATED)) + if (!(dev_pe->state & EEH_PE_ISOLATED)) phb->freeze_pe(phb, pe_no); + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE); + + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || (ret & active_flags) == active_flags) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + return 0; } From fe7e85c6f5ff63a8cd081deb35e58a0bd47589cd Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 30 Sep 2014 12:39:10 +1000 Subject: [PATCH 094/130] powerpc/powernv: Override dma_get_required_mask() The dma_get_required_mask() function is used by some drivers to query the platform about what DMA mask is needed to cover all of memory. This is a bit of a strange semantic when we have to choose between IOMMU translation or bypass, but essentially what it means is "what DMA mask will give best performances". Currently, our IOMMU backend always returns a 32-bit mask here, we don't do anything special to it when we have bypass available. This causes some drivers to choose a 32-bit mask, thus losing the ability to use the bypass window, thinking this is more efficient. The problem was reported from the driver of following device: 0004:03:00.0 0107: 1000:0087 (rev 05) 0004:03:00.0 Serial Attached SCSI controller: LSI Logic / Symbios \ Logic SAS2308 PCI-Express Fusion-MPT SAS-2 (rev 05) This patch adds an override of that function in order to, instead, return a 64-bit mask whenever a bypass window is available in order for drivers to prefer this configuration. Reported-by: Murali N. Iyer Suggested-by: Benjamin Herrenschmidt Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 1 + arch/powerpc/kernel/dma.c | 14 ++++++++++---- arch/powerpc/platforms/powernv/pci-ioda.c | 23 +++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci.c | 11 +++++++++++ arch/powerpc/platforms/powernv/pci.h | 2 ++ arch/powerpc/platforms/powernv/powernv.h | 6 ++++++ arch/powerpc/platforms/powernv/setup.c | 9 +++++++++ 7 files changed, 62 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 150866b2a3fe..894d538f3567 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask) extern int dma_set_mask(struct device *dev, u64 dma_mask); extern int __dma_set_mask(struct device *dev, u64 dma_mask); +extern u64 __dma_get_required_mask(struct device *dev); #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index ee78f6e49d64..210ff9d3c182 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -202,6 +202,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask) *dev->dma_mask = dma_mask; return 0; } + int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -210,13 +211,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -u64 dma_get_required_mask(struct device *dev) +u64 __dma_get_required_mask(struct device *dev) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - if (ppc_md.dma_get_required_mask) - return ppc_md.dma_get_required_mask(dev); - if (unlikely(dma_ops == NULL)) return 0; @@ -225,6 +223,14 @@ u64 dma_get_required_mask(struct device *dev) return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); } + +u64 dma_get_required_mask(struct device *dev) +{ + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + return __dma_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8685dacccb0b..0fd6bdb493c9 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -889,6 +889,28 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, return 0; } +static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, + struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + u64 end, mask; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return 0; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + if (!pe->tce_bypass_enabled) + return __dma_get_required_mask(&pdev->dev); + + + end = pe->tce_bypass_base + memblock_end_of_DRAM(); + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus, bool add_to_iommu_group) @@ -1781,6 +1803,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; + phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ phb->shutdown = pnv_pci_ioda_shutdown; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b854b57ed5e1..e9f509bbc078 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -761,6 +761,17 @@ int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return __dma_set_mask(&pdev->dev, dma_mask); } +u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->dma_get_required_mask) + return phb->dma_get_required_mask(phb, pdev); + + return __dma_get_required_mask(&pdev->dev); +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 27594cf7f76d..34d29eb2a4de 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -124,6 +124,8 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pnv_phb *phb, + struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 75501bfede7f..6c8e2d188cd0 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -13,6 +13,7 @@ struct pci_dev; extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); +extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } @@ -21,6 +22,11 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { return -ENODEV; } + +static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + return 0; +} #endif extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index bb1fc9b8d55e..3f9546d8a51f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -173,6 +173,14 @@ static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) return __dma_set_mask(dev, dma_mask); } +static u64 pnv_dma_get_required_mask(struct device *dev) +{ + if (dev_is_pci(dev)) + return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); + + return __dma_get_required_mask(dev); +} + static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -335,6 +343,7 @@ define_machine(powernv) { .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, .dma_set_mask = pnv_dma_set_mask, + .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif From 2013add4ce73c93ae2148969a9ec3ecc8b1e26fa Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 1 Oct 2014 14:34:51 +1000 Subject: [PATCH 095/130] powerpc/eeh: Show hex prefix for PE state sysfs As Michael suggested, the hex prefix for the output of EEH PE state sysfs entry (/sys/bus/pci/devices/xxx/eeh_pe_state) is always informative to users. Suggested-by: Michael Ellerman Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 9a44010bd4b5..f19b1e5cb060 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -65,7 +65,7 @@ static ssize_t eeh_pe_state_show(struct device *dev, return -ENODEV; state = eeh_ops->get_state(edev->pe, NULL); - return sprintf(buf, "%08x %08x\n", + return sprintf(buf, "%0x08x %0x08x\n", state, edev->pe->state); } From e35735b9a5d8d38d9ffe2f1f0cdcbb0d45c42eff Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 2 Oct 2014 15:44:21 +1000 Subject: [PATCH 096/130] powerpc: Speed up clear_page by unrolling it Unroll clear_page 8 times. A simple microbenchmark which allocates and frees a zeroed page: for (i = 0; i < iterations; i++) { unsigned long p = __get_free_page(GFP_KERNEL | __GFP_ZERO); free_page(p); } improves 20% on POWER8. This assumes cacheline sizes won't grow beyond 512 bytes or page sizes wont drop below 1kB, which is unlikely, but we could add a runtime check during early init if it makes people nervous. Michael found that some versions of gcc produce quite bad code (all multiplies), so we give gcc a hand by using shifts and adds. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page_64.h | 38 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index d0d6afb353d4..d908a46d05c0 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -42,20 +42,40 @@ typedef unsigned long pte_basic_t; -static __inline__ void clear_page(void *addr) +static inline void clear_page(void *addr) { - unsigned long lines, line_size; + unsigned long iterations; + unsigned long onex, twox, fourx, eightx; - line_size = ppc64_caches.dline_size; - lines = ppc64_caches.dlines_per_page; + iterations = ppc64_caches.dlines_per_page / 8; - __asm__ __volatile__( + /* + * Some verisions of gcc use multiply instructions to + * calculate the offsets so lets give it a hand to + * do better. + */ + onex = ppc64_caches.dline_size; + twox = onex << 1; + fourx = onex << 2; + eightx = onex << 3; + + asm volatile( "mtctr %1 # clear_page\n\ -1: dcbz 0,%0\n\ - add %0,%0,%3\n\ + .balign 16\n\ +1: dcbz 0,%0\n\ + dcbz %3,%0\n\ + dcbz %4,%0\n\ + dcbz %5,%0\n\ + dcbz %6,%0\n\ + dcbz %7,%0\n\ + dcbz %8,%0\n\ + dcbz %9,%0\n\ + add %0,%0,%10\n\ bdnz+ 1b" - : "=r" (addr) - : "r" (lines), "0" (addr), "r" (line_size) + : "=&r" (addr) + : "r" (iterations), "0" (addr), "b" (onex), "b" (twox), + "b" (twox+onex), "b" (fourx), "b" (fourx+onex), + "b" (twox+fourx), "b" (eightx-onex), "r" (eightx) : "ctr", "memory"); } From 63af52629adcd1313c7db252f085263012ecd9db Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Sep 2014 16:59:56 +1000 Subject: [PATCH 097/130] powerpc: Simplify do_sigbus Exit out early for a kernel fault, avoiding indenting of most of the function. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..abc8c816a326 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -120,16 +120,16 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address) up_read(¤t->mm->mmap_sem); - if (user_mode(regs)) { - current->thread.trap_nr = BUS_ADRERR; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; - } - return MM_FAULT_ERR(SIGBUS); + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGBUS); + + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) From 3913fdd7a23d9d8480ce3a6ca9cdf78bf0dec5a0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Sep 2014 16:59:57 +1000 Subject: [PATCH 098/130] powerpc: Add VM_FAULT_HWPOISON handling to powerpc page fault handler do_page_fault was missing knowledge of HWPOISON, and we would oops if userspace tried to access a poisoned page: kernel BUG at arch/powerpc/mm/fault.c:180! Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index abc8c816a326..588b6ccc0569 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -114,7 +114,8 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int do_sigbus(struct pt_regs *regs, unsigned long address) +static int do_sigbus(struct pt_regs *regs, unsigned long address, + unsigned int fault) { siginfo_t info; @@ -128,6 +129,13 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address) info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *)address; +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + current->comm, current->pid, address); + info.si_code = BUS_MCEERR_AR; + } +#endif force_sig_info(SIGBUS, &info, current); return MM_FAULT_RETURN; } @@ -170,11 +178,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_RETURN; } - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); /* We don't understand the fault code, this is fatal */ BUG(); From 9d57472f61acd7c3a33ebf5a79361e316d8ffbef Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Sep 2014 16:59:58 +1000 Subject: [PATCH 099/130] powerpc: Fill in si_addr_lsb siginfo field Fill in the si_addr_lsb siginfo field so the hwpoison code can pass to userspace the length of memory that has been corrupted. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 588b6ccc0569..24b3f4949df4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { siginfo_t info; + unsigned int lsb = 0; up_read(¤t->mm->mmap_sem); @@ -135,7 +137,13 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, current->comm, current->pid, address); info.si_code = BUS_MCEERR_AR; } + + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; #endif + info.si_addr_lsb = lsb; force_sig_info(SIGBUS, &info, current); return MM_FAULT_RETURN; } From c7d1f6afe062d2dc4bb8109856519570f2fe3c13 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 14:39:35 +1000 Subject: [PATCH 100/130] powerpc: Use pr_fmt in module loader code Use pr_fmt to give some context to the error messages in the module code, and convert open coded debug printk to pr_debug. Use pr_err for error messages. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_32.c | 31 +++++++++++++--------------- arch/powerpc/kernel/module_64.c | 36 ++++++++++++++++----------------- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 6cff040bf456..c94d2e018d84 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -28,12 +31,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif - /* Count how many different relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) @@ -121,8 +118,8 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, continue; if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %u\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %u\n", (void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela)); @@ -161,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + pr_err("Module doesn't contain .plt or .init.plt sections.\n"); return -ENOEXEC; } @@ -189,7 +186,7 @@ static uint32_t do_plt_call(void *location, { struct ppc_plt_entry *entry; - DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ if (location >= mod->module_core && location < mod->module_core + mod->core_size) @@ -208,7 +205,7 @@ static uint32_t do_plt_call(void *location, entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ - DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } @@ -224,7 +221,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, uint32_t *location; uint32_t value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ @@ -268,17 +265,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, sechdrs, module); /* Only replace bits 2 through 26 */ - DEBUGP("REL24 value = %08X. location = %08X\n", + pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); - DEBUGP("Location before: %08X.\n", + pr_debug("Location before: %08X.\n", *(uint32_t *)location); *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); - DEBUGP("Location after: %08X.\n", + pr_debug("Location after: %08X.\n", *(uint32_t *)location); - DEBUGP("ie. jump to %08X+%08X = %08X\n", + pr_debug("ie. jump to %08X+%08X = %08X\n", *(uint32_t *)location & 0x03fffffc, (uint32_t)location, (*(uint32_t *)location & 0x03fffffc) @@ -291,7 +288,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, break; default: - printk("%s: unknown ADD relocation: %u\n", + pr_err("%s: unknown ADD relocation: %u\n", module->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index d807ee626af9..68384514506b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -36,11 +39,6 @@ Using a magic allocator which places modules within 32MB solves this, and makes other things simpler. Anton? --RR. */ -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif #if defined(_CALL_ELF) && _CALL_ELF == 2 #define R2_STACK_OFFSET 24 @@ -279,8 +277,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* Every relocated section... */ for (i = 1; i < hdr->e_shnum; i++) { if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %lu\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %Lu\n", (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela)); @@ -304,7 +302,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs++; #endif - DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -390,7 +388,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, } if (!me->arch.stubs_section) { - printk("%s: doesn't contain .stubs.\n", me->name); + pr_err("%s: doesn't contain .stubs.\n", me->name); return -ENOEXEC; } @@ -434,11 +432,11 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - printk("%s: Address %p of stub out of range of %p.\n", + pr_err("%s: Address %p of stub out of range of %p.\n", me->name, (void *)reladdr, (void *)my_r2); return 0; } - DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); @@ -477,7 +475,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { - printk("%s: Expect noop after relocate, got %08x\n", + pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } @@ -498,7 +496,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, unsigned long *location; unsigned long value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); /* First time we're called, we can fix up .TOC. */ @@ -519,7 +517,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rela[i].r_info); - DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n", location, (long)ELF64_R_TYPE(rela[i].r_info), strtab + sym->st_name, (unsigned long)sym->st_value, (long)rela[i].r_addend); @@ -546,7 +544,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if (value + 0x8000 > 0xffff) { - printk("%s: bad TOC16 relocation (%lu)\n", + pr_err("%s: bad TOC16 relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -567,7 +565,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0 || value + 0x8000 > 0xffff) { - printk("%s: bad TOC16_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -580,7 +578,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0) { - printk("%s: bad TOC16_LO_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -613,7 +611,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Convert value to relative */ value -= (unsigned long)location; if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ - printk("%s: REL24 %li out of range!\n", + pr_err("%s: REL24 %li out of range!\n", me->name, (long int)value); return -ENOEXEC; } @@ -655,7 +653,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; default: - printk("%s: Unknown ADD relocation: %lu\n", + pr_err("%s: Unknown ADD relocation: %lu\n", me->name, (unsigned long)ELF64_R_TYPE(rela[i].r_info)); return -ENOEXEC; From 3e47d1474c2b4099f0fadd12a6553fdb2e8feaae Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 14:39:36 +1000 Subject: [PATCH 101/130] powerpc: Remove powerpc specific cmd_line There is no need for yet another copy of the command line, just use boot_command_line like everyone else. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 -- arch/powerpc/kernel/prom.c | 4 +--- arch/powerpc/kernel/setup-common.c | 2 -- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/init_32.c | 4 ++-- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/setup.c | 8 ++++---- drivers/cpufreq/pmac32-cpufreq.c | 2 +- drivers/tty/hvc/hvc_vio.c | 2 +- 10 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index b125ceab149c..902ab203aeb2 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -330,8 +330,6 @@ extern struct machdep_calls *machine_id; extern void probe_machine(void); -extern char cmd_line[COMMAND_LINE_SIZE]; - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 59576255d22b..099f27e6d1b0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -668,14 +668,12 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); - /* Save command line for /proc/cmdline and then parse parameters */ - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index c933acd4e2bd..0df37f5e2d54 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -81,8 +81,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); unsigned long klimit = (unsigned long) _end; -char cmd_line[COMMAND_LINE_SIZE]; - /* * This still seems to be needed... -- paulus */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ea4fda60e57b..07831ed0d9ef 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -268,7 +268,7 @@ static void __init exc_lvl_early_init(void) /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* so udelay does something sensible, assume <= 1000 bogomips */ loops_per_jiffy = 500000000 / HZ; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2ef9893a06bd..cd07d79ad21c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -667,7 +667,7 @@ void __init setup_arch(char **cmdline_p) { ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* * Set cache line size based on type of cpu as a default. diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec23..cad68ff8eca5 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM; void MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { + if (strstr(boot_command_line, "nobats")) { __map_without_bats = 1; } - if (strstr(cmd_line, "noltlbs")) { + if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 7044fd36197b..5b77b1919fd2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -258,7 +258,7 @@ static void chrp_init_early(void) struct device_node *node; const char *property; - if (strstr(cmd_line, "console=")) + if (strstr(boot_command_line, "console=")) return; /* find the boot console from /chosen/stdout */ if (!of_chosen) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 141f8899a633..b127a29ac526 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -336,7 +336,7 @@ static void __init pmac_setup_arch(void) #endif #ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { + if (strstr(boot_command_line, "adb_sync")) { extern int __adb_probe_sync; __adb_probe_sync = 1; } @@ -460,7 +460,7 @@ pmac_halt(void) static void __init pmac_init_early(void) { /* Enable early btext debug if requested */ - if (strstr(cmd_line, "btextdbg")) { + if (strstr(boot_command_line, "btextdbg")) { udbg_adb_init_early(); register_early_udbg_console(); } @@ -469,8 +469,8 @@ static void __init pmac_init_early(void) pmac_feature_init(); /* Initialize debug stuff */ - udbg_scc_init(!!strstr(cmd_line, "sccdbg")); - udbg_adb_init(!!strstr(cmd_line, "btextdbg")); + udbg_scc_init(!!strstr(boot_command_line, "sccdbg")); + udbg_adb_init(!!strstr(boot_command_line, "btextdbg")); #ifdef CONFIG_PPC64 iommu_init_early_dart(); diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 7615180d7ee3..1f49d97a70ea 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -611,7 +611,7 @@ static int __init pmac_cpufreq_setup(void) struct device_node *cpunode; const u32 *value; - if (strstr(cmd_line, "nocpufreq")) + if (strstr(boot_command_line, "nocpufreq")) return 0; /* Get first CPU node */ diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index 5618b5fc7500..f575a9b5ede7 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -452,7 +452,7 @@ void __init hvc_vio_init_early(void) return; #endif /* Check whether the user has requested a different console. */ - if (!strstr(cmd_line, "console=")) + if (!strstr(boot_command_line, "console=")) add_preferred_console("hvc", 0, NULL); hvc_instantiate(0, 0, ops); } From 9a4f5cd0a550c755a3f2e0cdcc541493f98feb8e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 14:39:38 +1000 Subject: [PATCH 102/130] powerpc: Add printk levels to powernv platform code Add printk levels to powernv platform code, and convert to pr_err() etc while here. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-nvram.c | 2 +- arch/powerpc/platforms/powernv/opal.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index acd9f7e96678..f9896fd5d04a 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -78,7 +78,7 @@ void __init opal_nvram_init(void) } nvram_size = be32_to_cpup(nbytes_p); - printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size); + pr_info("OPAL nvram setup, %u bytes\n", nvram_size); of_node_put(np); ppc_md.nvram_read = opal_nvram_read; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b44eec3e8dbd..cf85ba86bf5e 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -105,12 +105,12 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; powerpc_firmware_features |= FW_FEATURE_OPALv3; - printk("OPAL V3 detected !\n"); + pr_info("OPAL V3 detected !\n"); } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; - printk("OPAL V2 detected !\n"); + pr_info("OPAL V2 detected !\n"); } else { - printk("OPAL V1 detected !\n"); + pr_info("OPAL V1 detected !\n"); } /* Reinit all cores with the right endian */ From a7696b36c01316308f2b9f2009ce38cb01fd7a3f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 17 Sep 2014 14:39:39 +1000 Subject: [PATCH 103/130] powerpc: Add printk levels to powerpc code Add printk levels to some places in the powerpc port. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable-ppc32.h | 4 ++-- arch/powerpc/include/asm/pgtable-ppc64-4k.h | 2 +- arch/powerpc/include/asm/pgtable-ppc64.h | 6 +++--- arch/powerpc/kernel/irq.c | 6 +++--- arch/powerpc/kernel/setup-common.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 622672fef309..945e47adf7db 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -48,10 +48,10 @@ extern int icache_44x_need_flush; #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ - printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ + pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 12798c9d4b4b..7b935683f268 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -64,7 +64,7 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 7b3d54fae46f..ae153c40ab7c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -328,11 +328,11 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) + pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891de162e..8eb857f216c1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -444,13 +444,13 @@ void migrate_irqs(void) cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); + pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } if (chip->irq_set_affinity) chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); + pr_err("Cannot set affinity for irq %i\n", irq); } free_cpumask_var(mask); @@ -470,7 +470,7 @@ static inline void check_stack_overflow(void) /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", + pr_err("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0df37f5e2d54..1362cd62b3fa 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -383,7 +383,7 @@ void __init check_for_initrd(void) initrd_start = initrd_end = 0; if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); DBG(" <- check_for_initrd()\n"); #endif /* CONFIG_BLK_DEV_INITRD */ From 9410e0185e65394c0c6d046033904b53b97a9423 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 25 Sep 2014 16:39:18 +1000 Subject: [PATCH 104/130] powerpc/iommu/ddw: Fix endianness rtas_call() accepts and returns values in CPU endianness. The ddw_query_response and ddw_create_response structs members are defined and treated as BE but as they are passed to rtas_call() as (u32 *) and they get byteswapped automatically, the data is CPU-endian. This fixes ddw_query_response and ddw_create_response definitions and use. of_read_number() is designed to work with device tree cells - it assumes the input is big-endian and returns data in CPU-endian. However due to the ddw_create_response struct fix, create.addr_hi/lo are already CPU-endian so do not byteswap them. ddw_avail is a pointer to the "ibm,ddw-applicable" property which contains 3 cells which are big-endian as it is a device tree. rtas_call() accepts a RTAS token in CPU-endian. This makes use of of_property_read_u32_array to byte swap and avoid the need for a number of be32_to_cpu calls. Cc: stable@vger.kernel.org # v3.13+ Cc: Benjamin Herrenschmidt [aik: folded Anton's patch with of_property_read_u32_array] Signed-off-by: Alexey Kardashevskiy Acked-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/iommu.c | 51 ++++++++++++++------------ 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4642d6a4d356..de1ec54a2a57 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -329,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - __be32 windows_available; - __be32 largest_available_block; - __be32 page_size; - __be32 migration_capable; + u32 windows_available; + u32 largest_available_block; + u32 page_size; + u32 migration_capable; }; struct ddw_create_response { - __be32 liobn; - __be32 addr_hi; - __be32 addr_lo; + u32 liobn; + u32 addr_hi; + u32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -725,16 +725,18 @@ static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret = 0; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -872,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -910,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -942,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) goto out_failed; /* @@ -966,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_failed; } - if (be32_to_cpu(query.page_size) & 4) { + if (query.page_size & 4) { page_shift = 24; /* 16MB */ - } else if (be32_to_cpu(query.page_size) & 2) { + } else if (query.page_size & 2) { page_shift = 16; /* 64kB */ - } else if (be32_to_cpu(query.page_size) & 1) { + } else if (query.page_size & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -980,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { + if (query.largest_available_block < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1006,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = create.liobn; - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1039,7 +1044,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: From 8abf29f829de366b989b360af96b2ba871324ad6 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 19 Sep 2014 17:25:22 +0800 Subject: [PATCH 105/130] powerpc/pci: remove duplicate declaration of pci_bus_find_capability pci_bus_find_capability() is decleared in pci.h, so it is not necessary to do it again. This patch removes it. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bd84771f89fc..bd70a51d5747 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8) EARLY_PCI_OP(write, word, u16) EARLY_PCI_OP(write, dword, u32) -extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); int early_find_capability(struct pci_controller *hose, int bus, int devfn, int cap) { From d3b94e4b3b22b9f95c4d6a5f0c7fcf9288309c20 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 24 Sep 2014 15:57:10 +1000 Subject: [PATCH 106/130] powerpc: Don't build powernv for other platform defconfigs Because powernv arrived after these other platforms, the defconfigs didn't have PPC_POWERNV disabled, and being default y it gets turned on. If we're going to bother having defconfigs for the specific platforms then they should only build the code required for those platforms. The grab bag of everything config is ppc64_defconfig. Signed-off-by: Michael Ellerman --- arch/powerpc/configs/cell_defconfig | 1 + arch/powerpc/configs/celleb_defconfig | 1 + arch/powerpc/configs/g5_defconfig | 1 + arch/powerpc/configs/maple_defconfig | 1 + arch/powerpc/configs/pasemi_defconfig | 1 + 5 files changed, 5 insertions(+) diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 45fd06cdc3e8..7a7b3c879f96 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -18,6 +18,7 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PS3=y diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 77d7bf3ca2ac..acccbfde8a50 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -15,6 +15,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_CELLEB=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 7594c5ac6481..6fab06f7f411 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -16,6 +16,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index c8b6a9ddb21b..fbd9e4163311 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -16,6 +16,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index e5e7838af008..3e72c8c06a0d 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -14,6 +14,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PASEMI=y From edcee77fef62c66db7b50c4883f5b0f94de9ae69 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 24 Sep 2014 15:57:12 +1000 Subject: [PATCH 107/130] powerpc/kdump: crash_dump.c needs to include io.h For __ioremap(). Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash_dump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f378ca2c..c78e6dac4d7d 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include From d0b7abb2c7c05552aca2a0358c53b6a48eab2843 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 24 Sep 2014 15:57:11 +1000 Subject: [PATCH 108/130] powerpc: Enable CONFIG_CRASH_DUMP=y for ppc64_defconfig It pulls in more code, including causing us to build a relocatable kernel, which is good for testing. The resulting kernel is still usable as a non-crash dump kernel. Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f6c02f8cdc62..b9b769e4a23e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -50,6 +50,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_SCHED_SMT=y From bf7588a0859580a45c63cb082825d77c13eca357 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 3 Oct 2014 17:12:25 +1000 Subject: [PATCH 109/130] powerpc/powernv: Fix endian bug in LPC bus debugfs accessors When reading from the LPC, the OPAL FW calls return the value via pointer to a uint32_t which is always returned big endian. Our internal inb/outb implementation byteswaps that fine but our debugfs code is still broken. Signed-off-by: Benjamin Herrenschmidt CC: Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-lpc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index ad4b31df779a..dd2c285ad170 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -191,6 +191,7 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, { struct lpc_debugfs_entry *lpc = filp->private_data; u32 data, pos, len, todo; + __be32 bedata; int rc; if (!access_ok(VERIFY_WRITE, ubuf, count)) @@ -213,9 +214,10 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, len = 2; } rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, - &data, len); + &bedata, len); if (rc) return -ENXIO; + data = be32_to_cpu(bedata); switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); From 48bee8a6c98e34367fa9d5e1be14109c92cbbb3b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Tue, 30 Sep 2014 23:03:17 -0700 Subject: [PATCH 110/130] powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack allocations Ian pointed out the use of __aligned(4096) caused rather large stack consumption in single_24x7_request(), so use the kmem_cache hv_page_cache (which we've already got set up for other allocations) insead of allocating locally. CC: Haren Myneni Reported-by: Ian Munsie Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 55 +++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 70d4f748b54b..697759d8eb16 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -294,7 +294,7 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret; + unsigned long ret = -ENOMEM; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -304,7 +304,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, struct reqb { struct hv_24x7_request_buffer buf; struct hv_24x7_request req; - } __packed __aligned(4096) request_buffer = { + } __packed *request_buffer; + + struct { + struct hv_24x7_data_result_buffer buf; + struct hv_24x7_result res; + struct hv_24x7_result_element elem; + __be64 result; + } __packed *result_buffer; + + BUILD_BUG_ON(sizeof(*request_buffer) > 4096); + BUILD_BUG_ON(sizeof(*result_buffer) > 4096); + + request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!request_buffer) + goto out; + + result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!result_buffer) + goto out_free_request_buffer; + + *request_buffer = (struct reqb) { .buf = { .interface_version = HV_24X7_IF_VERSION_CURRENT, .num_requests = 1, @@ -320,28 +340,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, } }; - struct resb { - struct hv_24x7_data_result_buffer buf; - struct hv_24x7_result res; - struct hv_24x7_result_element elem; - __be64 result; - } __packed __aligned(4096) result_buffer = {}; - ret = plpar_hcall_norets(H_GET_24X7_DATA, - virt_to_phys(&request_buffer), sizeof(request_buffer), - virt_to_phys(&result_buffer), sizeof(result_buffer)); + virt_to_phys(request_buffer), sizeof(*request_buffer), + virt_to_phys(result_buffer), sizeof(*result_buffer)); if (ret) { if (success_expected) - pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n", - domain, offset, ix, lpar, - ret, ret, - result_buffer.buf.detailed_rc, - result_buffer.buf.failing_request_ix); - return ret; + pr_err_ratelimited("hcall failed: %d %#x %#x %d => " + "0x%lx (%ld) detail=0x%x failing ix=%x\n", + domain, offset, ix, lpar, ret, ret, + result_buffer->buf.detailed_rc, + result_buffer->buf.failing_request_ix); + goto out_free_result_buffer; } - *res = be64_to_cpu(result_buffer.result); + *res = be64_to_cpu(result_buffer->result); + +out_free_result_buffer: + kfree(result_buffer); +out_free_request_buffer: + kfree(request_buffer); +out: return ret; } From 56f12bee55d740dc47eed0ca9d5c72cffdffd6cf Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Tue, 30 Sep 2014 23:03:18 -0700 Subject: [PATCH 111/130] powerpc/perf/hv-24x7: Simplify catalog_read() catalog_read() implements the read interface for the sysfs file /sys/bus/event_source/devices/hv_24x7/interface/catalog It essentially takes a buffer, an offset and count as parameters to the read() call. It makes a hypervisor call to read a specific page from the catalog and copy the required bytes into the given buffer. Each call to catalog_read() returns at most one 4K page. Given these requirements, we should be able to simplify the catalog_read(). Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 101 +++++------------------------------- 1 file changed, 14 insertions(+), 87 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 697759d8eb16..6c8710dd90c9 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -75,86 +75,6 @@ static struct attribute_group format_group = { static struct kmem_cache *hv_page_cache; -/* - * read_offset_data - copy data from one buffer to another while treating the - * source buffer as a small view on the total avaliable - * source data. - * - * @dest: buffer to copy into - * @dest_len: length of @dest in bytes - * @requested_offset: the offset within the source data we want. Must be > 0 - * @src: buffer to copy data from - * @src_len: length of @src in bytes - * @source_offset: the offset in the sorce data that (src,src_len) refers to. - * Must be > 0 - * - * returns the number of bytes copied. - * - * The following ascii art shows the various buffer possitioning we need to - * handle, assigns some arbitrary varibles to points on the buffer, and then - * shows how we fiddle with those values to get things we care about (copy - * start in src and copy len) - * - * s = @src buffer - * d = @dest buffer - * '.' areas in d are written to. - * - * u - * x w v z - * d |.........| - * s |----------------------| - * - * u - * x w z v - * d |........------| - * s |------------------| - * - * x w u,z,v - * d |........| - * s |------------------| - * - * x,w u,v,z - * d |..................| - * s |------------------| - * - * x u - * w v z - * d |........| - * s |------------------| - * - * x z w v - * d |------| - * s |------| - * - * x = source_offset - * w = requested_offset - * z = source_offset + src_len - * v = requested_offset + dest_len - * - * w_offset_in_s = w - x = requested_offset - source_offset - * z_offset_in_s = z - x = src_len - * v_offset_in_s = v - x = request_offset + dest_len - src_len - */ -static ssize_t read_offset_data(void *dest, size_t dest_len, - loff_t requested_offset, void *src, - size_t src_len, loff_t source_offset) -{ - size_t w_offset_in_s = requested_offset - source_offset; - size_t z_offset_in_s = src_len; - size_t v_offset_in_s = requested_offset + dest_len - src_len; - size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); - size_t copy_len = u_offset_in_s - w_offset_in_s; - - if (requested_offset < 0 || source_offset < 0) - return -EINVAL; - - if (z_offset_in_s <= w_offset_in_s) - return 0; - - memcpy(dest, src + w_offset_in_s, copy_len); - return copy_len; -} - static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -183,8 +103,10 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, { unsigned long hret; ssize_t ret = 0; - size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; + size_t catalog_len = 0, catalog_page_len = 0; loff_t page_offset = 0; + loff_t offset_in_page; + size_t copy_len; uint64_t catalog_version_num = 0; void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); struct hv_24x7_catalog_page_0 *page_0 = page; @@ -202,7 +124,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, catalog_len = catalog_page_len * 4096; page_offset = offset / 4096; - page_count = count / 4096; + offset_in_page = offset % 4096; if (page_offset >= catalog_page_len) goto e_free; @@ -216,8 +138,13 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, } } - ret = read_offset_data(buf, count, offset, - page, 4096, page_offset * 4096); + copy_len = 4096 - offset_in_page; + if (copy_len > count) + copy_len = count; + + memcpy(buf, page+offset_in_page, copy_len); + ret = copy_len; + e_free: if (hret) pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" @@ -225,9 +152,9 @@ e_free: catalog_version_num, page_offset, hret); kmem_cache_free(hv_page_cache, page); - pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", - offset, page_offset, count, page_count, catalog_len, - catalog_page_len, ret); + pr_devel("catalog_read: offset=%lld(%lld) count=%zu " + "catalog_len=%zu(%zu) => %zd\n", offset, page_offset, + count, catalog_len, catalog_page_len, ret); return ret; } From dfdac3932d769c2dcafbd37e3f231d5f50c1ac1c Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Tue, 30 Sep 2014 23:03:21 -0700 Subject: [PATCH 112/130] powerpc: Update contact info in Documentation files Cody's email address has changed. Update the contact information for the 24x7 and GPCI counters to the PowerPC developers mailing list. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- .../testing/sysfs-bus-event_source-devices-hv_24x7 | 6 +++--- .../testing/sysfs-bus-event_source-devices-hv_gpci | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index e78ee798d7bd..32f3f5f8bba2 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -1,6 +1,6 @@ What: /sys/bus/event_source/devices/hv_24x7/interface/catalog Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: Provides access to the binary "24x7 catalog" provided by the hypervisor on POWER7 and 8 systems. This catalog lists events @@ -10,14 +10,14 @@ Description: What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_length Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: A number equal to the length in bytes of the catalog. This is also extractable from the provided binary "catalog" sysfs entry. What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_version Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: Exposes the "version" field of the 24x7 catalog. This is also extractable from the provided binary "catalog" sysfs entry. diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 3fa58c23f13b..3ca4e554d2f9 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -1,6 +1,6 @@ What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: '0' if the hypervisor is configured to forbid access to event counters being accumulated by other guests and to physical @@ -9,35 +9,35 @@ Description: What: /sys/bus/event_source/devices/hv_gpci/interface/ga Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: 0 or 1. Indicates whether we have access to "GA" events (listed in arch/powerpc/perf/hv-gpci.h). What: /sys/bus/event_source/devices/hv_gpci/interface/expanded Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: 0 or 1. Indicates whether we have access to "EXPANDED" events (listed in arch/powerpc/perf/hv-gpci.h). What: /sys/bus/event_source/devices/hv_gpci/interface/lab Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: 0 or 1. Indicates whether we have access to "LAB" events (listed in arch/powerpc/perf/hv-gpci.h). What: /sys/bus/event_source/devices/hv_gpci/interface/version Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: A number indicating the version of the gpci interface that the hypervisor reports supporting. What: /sys/bus/event_source/devices/hv_gpci/interface/kernel_version Date: February 2014 -Contact: Cody P Schafer +Contact: Linux on PowerPC Developer List Description: A number indicating the latest version of the gpci interface that the kernel is aware of. From 60666de2dac99777631d0df64257d7fd6a5118fe Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 29 May 2014 17:45:47 +1000 Subject: [PATCH 113/130] powerpc/pseries: Use new defines when calling H_SET_MODE Now that we define these in the KVM code, use these defines when we call H_SET_MODE. No functional change. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/plpar_wrappers.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 12c32c5f533d..67859edbf8fd 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -273,7 +273,7 @@ static inline long plpar_set_mode(unsigned long mflags, unsigned long resource, static inline long enable_reloc_on_exceptions(void) { /* mflags = 3: Exceptions at 0xC000000000004000 */ - return plpar_set_mode(3, 3, 0, 0); + return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -284,7 +284,7 @@ static inline long enable_reloc_on_exceptions(void) * returns H_SUCCESS. */ static inline long disable_reloc_on_exceptions(void) { - return plpar_set_mode(0, 3, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0); } /* @@ -297,7 +297,7 @@ static inline long disable_reloc_on_exceptions(void) { static inline long enable_big_endian_exceptions(void) { /* mflags = 0: big endian exceptions */ - return plpar_set_mode(0, 4, 0, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0); } /* @@ -310,17 +310,17 @@ static inline long enable_big_endian_exceptions(void) static inline long enable_little_endian_exceptions(void) { /* mflags = 1: little endian exceptions */ - return plpar_set_mode(1, 4, 0, 0); + return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); } static inline long plapr_set_ciabr(unsigned long ciabr) { - return plpar_set_mode(0, 1, ciabr, 0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); } static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, 2, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ From e83d01697583d8610d1d62279758c2a881e3396f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:50 +1100 Subject: [PATCH 114/130] powerpc/cell: Move spu_handle_mm_fault() out of cell platform Currently spu_handle_mm_fault() is in the cell platform. This code is generically useful for other non-cell co-processors on powerpc. This patch moves this function out of the cell platform into arch/powerpc/mm so that others may use it. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 4 ++++ arch/powerpc/include/asm/copro.h | 16 ++++++++++++++++ arch/powerpc/include/asm/spu.h | 5 ++--- arch/powerpc/mm/Makefile | 1 + .../cell/spu_fault.c => mm/copro_fault.c} | 14 ++++++-------- arch/powerpc/platforms/cell/Kconfig | 1 + arch/powerpc/platforms/cell/Makefile | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 4 ++-- 8 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 arch/powerpc/include/asm/copro.h rename arch/powerpc/{platforms/cell/spu_fault.c => mm/copro_fault.c} (89%) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98ae8b714d31..88eace4e28c3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -608,6 +608,10 @@ config PPC_SUBPAGE_PROT to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. +config PPC_COPRO_BASE + bool + default n + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on PPC64 && SMP diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h new file mode 100644 index 000000000000..51cae85a50b4 --- /dev/null +++ b/arch/powerpc/include/asm/copro.h @@ -0,0 +1,16 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_COPRO_H +#define _ASM_POWERPC_COPRO_H + +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt); + +#endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 37b7ca39ec9f..a6e6e2bf9d15 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -27,6 +27,8 @@ #include #include #include +#include +#include #define LS_SIZE (256 * 1024) #define LS_ADDR_MASK (LS_SIZE - 1) @@ -277,9 +279,6 @@ void spu_remove_dev_attr(struct device_attribute *attr); int spu_add_dev_attr_group(struct attribute_group *attrs); void spu_remove_dev_attr_group(struct attribute_group *attrs); -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, - unsigned long dsisr, unsigned *flt); - /* * Notifier blocks: * diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index d0130fff20e5..325e861616a1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/mm/copro_fault.c similarity index 89% rename from arch/powerpc/platforms/cell/spu_fault.c rename to arch/powerpc/mm/copro_fault.c index 641e7273d75a..ba7df14c6b82 100644 --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -1,5 +1,5 @@ /* - * SPU mm fault handler + * CoProcessor (SPU/AFU) mm fault handler * * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 * @@ -23,16 +23,14 @@ #include #include #include - -#include -#include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ -int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; @@ -58,12 +56,12 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, goto out_unlock; } - is_write = dsisr & MFC_DSISR_ACCESS_PUT; + is_write = dsisr & DSISR_ISSTORE; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { - if (dsisr & MFC_DSISR_ACCESS_DENIED) + if (dsisr & DSISR_PROTFAULT) goto out_unlock; if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; @@ -91,4 +89,4 @@ out_unlock: up_read(&mm->mmap_sem); return ret; } -EXPORT_SYMBOL_GPL(spu_handle_mm_fault); +EXPORT_SYMBOL_GPL(copro_handle_mm_fault); diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9978f594cac0..870b6dbd4d18 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -86,6 +86,7 @@ config SPU_FS_64K_LS config SPU_BASE bool default n + select PPC_COPRO_BASE config CBE_RAS bool "RAS features for bare metal Cell BE" diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index fe053e7c73ee..2d16884f67b9 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -20,7 +20,7 @@ spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spu_notify.o \ - spu_syscalls.o spu_fault.o \ + spu_syscalls.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 8cb6260cc80f..e45894a08118 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -138,7 +138,7 @@ int spufs_handle_class1(struct spu_context *ctx) if (ctx->state == SPU_STATE_RUNNABLE) ctx->spu->stats.hash_flt++; - /* we must not hold the lock when entering spu_handle_mm_fault */ + /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); access = (_PAGE_PRESENT | _PAGE_USER); @@ -149,7 +149,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* hashing failed, so try the actual fault handler */ if (ret) - ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); + ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt); /* * This is nasty: we need the state_mutex for all the bookkeeping even From 73d16a6e0e51990cbe13f8d8f43bd5329bbab30a Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:51 +1100 Subject: [PATCH 115/130] powerpc/cell: Move data segment faulting code out of cell platform __spu_trap_data_seg() currently contains code to determine the VSID and ESID required for a particular EA and mm struct. This code is generically useful for other co-processors. This moves the code of the cell platform so it can be used by other powerpc code. It also adds 1TB segment handling which Cell didn't support. The new function is called copro_calculate_slb(). This also moves the internal struct spu_slb to a generic struct copro_slb which is now used in the Cell and copro code. We use this new struct instead of passing around esid and vsid parameters. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/copro.h | 7 ++++ arch/powerpc/include/asm/mmu-hash64.h | 7 ++++ arch/powerpc/mm/copro_fault.c | 46 +++++++++++++++++++++ arch/powerpc/mm/slb.c | 3 -- arch/powerpc/platforms/cell/spu_base.c | 55 +++++--------------------- 5 files changed, 69 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h index 51cae85a50b4..b0e6a183181f 100644 --- a/arch/powerpc/include/asm/copro.h +++ b/arch/powerpc/include/asm/copro.h @@ -10,7 +10,14 @@ #ifndef _ASM_POWERPC_COPRO_H #define _ASM_POWERPC_COPRO_H +struct copro_slb +{ + u64 esid, vsid; +}; + int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt); +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); + #endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 92bc3a637923..bd813c00ede2 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -190,6 +190,13 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #ifndef __ASSEMBLY__ +static inline int slb_vsid_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SLB_VSID_SHIFT; + return SLB_VSID_SHIFT_1T; +} + static inline int segment_shift(int ssize) { if (ssize == MMU_SEGSIZE_256M) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index ba7df14c6b82..a15a23efc0e2 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -90,3 +91,48 @@ out_unlock: return ret; } EXPORT_SYMBOL_GPL(copro_handle_mm_fault); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) +{ + u64 vsid; + int psize, ssize; + + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + psize = get_slice_psize(mm, ea); + ssize = user_segment_size(ea); + vsid = get_vsid(mm->context.id, ea, ssize); + break; + case VMALLOC_REGION_ID: + pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); + if (ea < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + case KERNEL_REGION_ID: + pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + psize = mmu_linear_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + default: + pr_debug("%s: invalid region access at %016llx\n", __func__, ea); + return 1; + } + + vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + + vsid |= mmu_psize_defs[psize].sllp | + ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); + + slb->vsid = vsid; + + return 0; +} +EXPORT_SYMBOL_GPL(copro_calculate_slb); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0399a6702958..6e450ca66526 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; } -#define slb_vsid_shift(ssize) \ - ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T) - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 2930d1e81a05..ffcbd242e669 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -76,10 +76,6 @@ static LIST_HEAD(spu_full_list); static DEFINE_SPINLOCK(spu_full_list_lock); static DEFINE_MUTEX(spu_full_list_mutex); -struct spu_slb { - u64 esid, vsid; -}; - void spu_invalidate_slbs(struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -149,7 +145,7 @@ static void spu_restart_dma(struct spu *spu) } } -static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) +static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -167,45 +163,12 @@ static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) { - struct mm_struct *mm = spu->mm; - struct spu_slb slb; - int psize; + struct copro_slb slb; + int ret; - pr_debug("%s\n", __func__); - - slb.esid = (ea & ESID_MASK) | SLB_ESID_V; - - switch(REGION_ID(ea)) { - case USER_REGION_ID: -#ifdef CONFIG_PPC_MM_SLICES - psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif - slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_USER; - break; - case VMALLOC_REGION_ID: - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - case KERNEL_REGION_ID: - psize = mmu_linear_psize; - slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; - break; - default: - /* Future: support kernel segments so that drivers - * can use SPUs. - */ - pr_debug("invalid region access at %016lx\n", ea); - return 1; - } - slb.vsid |= mmu_psize_defs[psize].sllp; + ret = copro_calculate_slb(spu->mm, ea, &slb); + if (ret) + return ret; spu_load_slb(spu, spu->slb_replace, &slb); @@ -253,7 +216,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } -static void __spu_kernel_slb(void *addr, struct spu_slb *slb) +static void __spu_kernel_slb(void *addr, struct copro_slb *slb) { unsigned long ea = (unsigned long)addr; u64 llp; @@ -272,7 +235,7 @@ static void __spu_kernel_slb(void *addr, struct spu_slb *slb) * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the * address @new_addr is present. */ -static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, +static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, void *new_addr) { unsigned long ea = (unsigned long)new_addr; @@ -297,7 +260,7 @@ static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, void *code, int code_size) { - struct spu_slb slbs[4]; + struct copro_slb slbs[4]; int i, nr_slbs = 0; /* start and end addresses of both mappings */ void *addrs[] = { From be3ebfe8215392b714349554c5138b8b6592fe20 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:52 +1100 Subject: [PATCH 116/130] powerpc/cell: Make spu_flush_all_slbs() generic This moves spu_flush_all_slbs() into a generic call copro_flush_all_slbs(). This will be useful when we add cxl which also needs a similar SLB flush call. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/copro.h | 6 ++++++ arch/powerpc/mm/copro_fault.c | 9 +++++++++ arch/powerpc/mm/hash_utils_64.c | 10 +++------- arch/powerpc/mm/slice.c | 10 +++------- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h index b0e6a183181f..ce216df31381 100644 --- a/arch/powerpc/include/asm/copro.h +++ b/arch/powerpc/include/asm/copro.h @@ -20,4 +20,10 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); + +#ifdef CONFIG_PPC_COPRO_BASE +void copro_flush_all_slbs(struct mm_struct *mm); +#else +static inline void copro_flush_all_slbs(struct mm_struct *mm) {} +#endif #endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index a15a23efc0e2..f2aa5a81ad9c 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -136,3 +137,11 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) return 0; } EXPORT_SYMBOL_GPL(copro_calculate_slb); + +void copro_flush_all_slbs(struct mm_struct *mm) +{ +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} +EXPORT_SYMBOL_GPL(copro_flush_all_slbs); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index dd73b63f9479..0842b726b03f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include @@ -901,9 +901,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); if (get_paca_psize(addr) != MMU_PAGE_4K) { get_paca()->context = mm->context; slb_flush_and_rebolt(); @@ -1141,9 +1139,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 86f6a755af0b..8d7bda94d196 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include /* some sanity checks */ @@ -234,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } /* @@ -673,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address, spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, From b0345bbc6d0980e63b9730b487d4f4ccb8e5cd12 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:53 +1100 Subject: [PATCH 117/130] powerpc/msi: Improve IRQ bitmap allocator Currently msi_bitmap_alloc_hwirqs() will round up any IRQ allocation requests to the nearest power of 2. eg. ask for 5 IRQs and you'll get 8. This wastes a lot of IRQs which can be a scarce resource. For cxl we may require multiple IRQs for every context that is attached to the accelerator. There may be 1000s of contexts attached, hence we can easily run out of IRQs, especially if we are needlessly wasting them. This changes the msi_bitmap_alloc_hwirqs() to allocate only the required number of IRQs, hence avoiding this wastage. It keeps the natural alignment requirement though. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/msi_bitmap.c | 36 ++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index a7c7a9fc7530..0c75214b6f92 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -20,32 +20,37 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num) int offset, order = get_count_order(num); spin_lock_irqsave(&bmp->lock, flags); - /* - * This is fast, but stricter than we need. We might want to add - * a fallback routine which does a linear search with no alignment. - */ - offset = bitmap_find_free_region(bmp->bitmap, bmp->irq_count, order); + + offset = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, 0, + num, (1 << order) - 1); + if (offset > bmp->irq_count) + goto err; + + bitmap_set(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); - pr_debug("msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: allocated 0x%x at offset 0x%x\n", num, offset); return offset; +err: + spin_unlock_irqrestore(&bmp->lock, flags); + return -ENOMEM; } +EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs); void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset, unsigned int num) { unsigned long flags; - int order = get_count_order(num); - pr_debug("msi_bitmap: freeing 0x%x (2^%d) at offset 0x%x\n", - num, order, offset); + pr_debug("msi_bitmap: freeing 0x%x at offset 0x%x\n", + num, offset); spin_lock_irqsave(&bmp->lock, flags); - bitmap_release_region(bmp->bitmap, offset, order); + bitmap_clear(bmp->bitmap, offset, num); spin_unlock_irqrestore(&bmp->lock, flags); } +EXPORT_SYMBOL(msi_bitmap_free_hwirqs); void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq) { @@ -180,6 +185,15 @@ static void __init test_basics(void) msi_bitmap_free_hwirqs(&bmp, size / 2, 1); check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2); + /* Check we get a naturally aligned offset */ + check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0); + check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0); + msi_bitmap_free(&bmp); /* Clients may check bitmap == NULL for "not-allocated" */ From 8ca7a82f7b64ad29206daba7bdaf2f8766437243 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:54 +1100 Subject: [PATCH 118/130] powerpc/mm: Export mmu_kernel_ssize and mmu_linear_psize Export mmu_kernel_ssize and mmu_linear_psize. These are needed by the cxl driver which has it's own MMU. To setup the MMU cxl needs access to these. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0842b726b03f..fe5609d7a9d7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -99,6 +99,7 @@ unsigned long htab_size_bytes; unsigned long htab_hash_mask; EXPORT_SYMBOL_GPL(htab_hash_mask); int mmu_linear_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -106,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K; #endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; +EXPORT_SYMBOL_GPL(mmu_kernel_ssize); int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); From fd9a1c26ae7d70a2dc1eafad7b9bf076ad8b67d9 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:55 +1100 Subject: [PATCH 119/130] powerpc/powernv: Split out set MSI IRQ chip code Some of the MSI IRQ code in pnv_pci_ioda_msi_setup() is generically useful so split it out. This will be used by some of the cxl PCIe code later. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 42 +++++++++++++---------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0fd6bdb493c9..56abb5240995 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1327,14 +1327,35 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d) icp_native_eoi(d); } + +static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +{ + struct irq_data *idata; + struct irq_chip *ichip; + + if (phb->type != PNV_PHB_IODA2) + return; + + if (!phb->ioda.irq_chip_init) { + /* + * First time we setup an MSI IRQ, we need to setup the + * corresponding IRQ chip to route correctly. + */ + idata = irq_get_irq_data(virq); + ichip = irq_data_get_irq_chip(idata); + phb->ioda.irq_chip_init = 1; + phb->ioda.irq_chip = *ichip; + phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; + } + irq_set_chip(virq, &phb->ioda.irq_chip); +} + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); struct pci_dn *pdn = pci_get_pdn(dev); - struct irq_data *idata; - struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; @@ -1386,22 +1407,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); - /* - * Change the IRQ chip for the MSI interrupts on PHB3. - * The corresponding IRQ chip should be populated for - * the first time. - */ - if (phb->type == PNV_PHB_IODA2) { - if (!phb->ioda.irq_chip_init) { - idata = irq_get_irq_data(virq); - ichip = irq_data_get_irq_chip(idata); - phb->ioda.irq_chip_init = 1; - phb->ioda.irq_chip = *ichip; - phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; - } - - irq_set_chip(virq, &phb->ioda.irq_chip); - } + set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %d\n", From 1cd258d7faccb330145f08d838608b2c6ad41604 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:56 +1100 Subject: [PATCH 120/130] cxl: Add new header for call backs and structs This new header adds callbacks and structs needed by the rest of the kernel to hook into the cxl infrastructure. This adds the cxl_ctx_in_use() function for use in the mm code to see if any cxl contexts are currently in use. This is used by the tlbie() to determine if it can do local TLB invalidations or not. This also adds get/put calls for the cxl driver module to refcount the active cxl contexts. cxl_ctx_get/put/in_use are static inlined here as they are called in tlbie which we want to be fast (mpe's suggestion). Empty functions are provided when CONFIG_CXL_BASE is not enabled. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- include/misc/cxl.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/misc/cxl.h diff --git a/include/misc/cxl.h b/include/misc/cxl.h new file mode 100644 index 000000000000..975cc7861f18 --- /dev/null +++ b/include/misc/cxl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MISC_CXL_H +#define _MISC_CXL_H + +#ifdef CONFIG_CXL_BASE + +#define CXL_IRQ_RANGES 4 + +struct cxl_irq_ranges { + irq_hw_number_t offset[CXL_IRQ_RANGES]; + irq_hw_number_t range[CXL_IRQ_RANGES]; +}; + +extern atomic_t cxl_use_count; + +static inline bool cxl_ctx_in_use(void) +{ + return (atomic_read(&cxl_use_count) != 0); +} + +static inline void cxl_ctx_get(void) +{ + atomic_inc(&cxl_use_count); +} + +static inline void cxl_ctx_put(void) +{ + atomic_dec(&cxl_use_count); +} + +void cxl_slbia(struct mm_struct *mm); + +#else /* CONFIG_CXL_BASE */ + +static inline bool cxl_ctx_in_use(void) { return false; } +static inline void cxl_slbia(struct mm_struct *mm) {} + +#endif /* CONFIG_CXL_BASE */ + +#endif From 80c49c7e4a397bb245d72a78e41640eeb0b6e953 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:57 +1100 Subject: [PATCH 121/130] powerpc/powerpc: Add new PCIe functions for allocating cxl interrupts This adds a number of functions for allocating IRQs under powernv PCIe for cxl. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-pci.h | 31 +++++ arch/powerpc/platforms/powernv/pci-ioda.c | 154 ++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 arch/powerpc/include/asm/pnv-pci.h diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h new file mode 100644 index 000000000000..f09a22fa1bd7 --- /dev/null +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_PNV_PCI_H +#define _ASM_PNV_PCI_H + +#include +#include + +int pnv_phb_to_cxl(struct pci_dev *dev); +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq); +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); +int pnv_cxl_get_irq_count(struct pci_dev *dev); +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev); + +#ifdef CONFIG_CXL_BASE +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num); +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev); +#endif + +#endif diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 56abb5240995..468a0f23c7f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -37,6 +37,9 @@ #include #include #include +#include + +#include #include "powernv.h" #include "pci.h" @@ -1350,6 +1353,157 @@ static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } +#ifdef CONFIG_CXL_BASE + +struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return hose->dn; +} +EXPORT_SYMBOL(pnv_pci_to_phb_node); + +int pnv_phb_to_cxl(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + int rc; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + pe_info(pe, "Switching PHB to CXL\n"); + + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); + if (rc) + dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); + + return rc; +} +EXPORT_SYMBOL(pnv_phb_to_cxl); + +/* Find PHB for cxl dev and allocate MSI hwirqs? + * Returns the absolute hardware IRQ number + */ +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); + + if (hwirq < 0) { + dev_warn(&dev->dev, "Failed to find a free MSI\n"); + return -ENOSPC; + } + + return phb->msi_base + hwirq; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); + +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); +} +EXPORT_SYMBOL(pnv_cxl_release_hwirqs); + +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq; + + for (i = 1; i < CXL_IRQ_RANGES; i++) { + if (!irqs->range[i]) + continue; + pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", + i, irqs->offset[i], + irqs->range[i]); + hwirq = irqs->offset[i] - phb->msi_base; + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, + irqs->range[i]); + } +} +EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); + +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq, try; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + /* 0 is reserved for the multiplexed PSL DSI interrupt */ + for (i = 1; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); + if (hwirq >= 0) + break; + try /= 2; + } + if (!try) + goto fail; + + irqs->offset[i] = phb->msi_base + hwirq; + irqs->range[i] = try; + pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", + i, irqs->offset[i], irqs->range[i]); + num -= try; + } + if (num) + goto fail; + + return 0; +fail: + pnv_cxl_release_hwirq_ranges(irqs, dev); + return -ENOSPC; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); + +int pnv_cxl_get_irq_count(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return phb->msi_bmp.irq_count; +} +EXPORT_SYMBOL(pnv_cxl_get_irq_count); + +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + unsigned int xive_num = hwirq - phb->msi_base; + struct pnv_ioda_pe *pe; + int rc; + + if (!(pe = pnv_ioda_get_pe(dev))) + return -ENODEV; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " + "hwirq 0x%x XIVE 0x%x PE\n", + pci_name(dev), rc, phb->msi_base, hwirq, xive_num); + return -EIO; + } + set_msi_irq_chip(phb, virq); + + return 0; +} +EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); +#endif + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) From a1dca3465a7be53980abab1e0d6646176fdc075f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:58 +1100 Subject: [PATCH 122/130] powerpc/mm: Add new hash_page_mm() This adds a new function hash_page_mm() based on the existing hash_page(). This version allows any struct mm to be passed in, rather than assuming current. This is useful for servicing co-processor faults which are not in the context of the current running process. We need to be careful here as the current hash_page() assumes current in a few places. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-hash64.h | 1 + arch/powerpc/mm/hash_utils_64.c | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index bd813c00ede2..aeebc94b2bce 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -324,6 +324,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned int local, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fe5609d7a9d7..d5339a3b9945 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -904,7 +904,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); - if (get_paca_psize(addr) != MMU_PAGE_4K) { + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -989,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; - struct mm_struct *mm; pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; @@ -1008,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; - mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); rc = 1; @@ -1019,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea, ssize); break; case VMALLOC_REGION_ID: - mm = &init_mm; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; @@ -1104,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); goto bail; } @@ -1145,7 +1143,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } } - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K @@ -1180,6 +1179,17 @@ bail: exception_exit(prev_state); return rc; } +EXPORT_SYMBOL_GPL(hash_page_mm); + +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + return hash_page_mm(mm, ea, access, trap); +} EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, From 09521736011e83003ca3cf7d9a31c43148306a20 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:54:59 +1100 Subject: [PATCH 123/130] powerpc/opal: Add PHB to cxl mode call This adds the OPAL call to change a PHB into cxl mode. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a084cf5bd76c..9124b0ede1fc 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -147,6 +147,7 @@ struct opal_sg_list { #define OPAL_GET_PARAM 89 #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 +#define OPAL_PCI_SET_PHB_CXL_MODE 93 #define OPAL_DUMP_INFO2 94 #define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 @@ -961,6 +962,7 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 15942b6ffd3a..e9e2450c1fdd 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -249,3 +249,4 @@ OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); +OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); From 4c6d9acce1f4ca815881f0e3adca192795eca1c5 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:00 +1100 Subject: [PATCH 124/130] powerpc/mm: Add hooks for cxl This adds hooks into the core powerpc mm code for cxl. The core powerpc code sometimes uses local tlbie. Unfortunately this won't work with the current cxl driver as it relies on snooping tlbie broadcasts. The cxl hardware can have TLB entries invalidated via MMIO but this is not currently supported by the driver. In future we can make local tlbie smarter so that it invalidates cxl contexts via MMIO when it needs to but for now we have this workaround. This workaround checks for any active cxl contexts and if so, disables local tlbie. This also adds a hook for when SLBs are invalidated. This ensures any corresponding SLBs in cxl are also invalidated at the same time. This is required for segment demotion. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 2 ++ arch/powerpc/mm/hash_native_64.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f2aa5a81ad9c..0f9939e693df 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -143,5 +144,6 @@ void copro_flush_all_slbs(struct mm_struct *mm) #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif + cxl_slbia(mm); } EXPORT_SYMBOL_GPL(copro_flush_all_slbs); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index afc0a8295f84..ae4962a06476 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,6 +29,8 @@ #include #include +#include + #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + unsigned int use_local; int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); + if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) From 10542ca0156f60571ef41799d44d40dd4cb0a473 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:01 +1100 Subject: [PATCH 125/130] cxl: Add base builtin support This adds the base cxl support that cannot be built as a module. Specifically it adds the cxl callbacks that are called from the core powerpc mm code which must always exist irrespective of if the cxl module is loaded or not. This is similar to how cell works with CONFIG_SPU_BASE. This adds a cxl_slbia() call (similar to spu_flush_all_slbs()) which checks if the cxl module is loaded and in use, returning immediately if it is not. If it is in use it calls into the cxl SLB invalidation code. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- drivers/misc/Kconfig | 1 + drivers/misc/Makefile | 1 + drivers/misc/cxl/Kconfig | 8 ++++ drivers/misc/cxl/Makefile | 1 + drivers/misc/cxl/base.c | 86 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+) create mode 100644 drivers/misc/cxl/Kconfig create mode 100644 drivers/misc/cxl/Makefile create mode 100644 drivers/misc/cxl/base.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index b841180c7c74..bbeb4516facf 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -527,4 +527,5 @@ source "drivers/misc/vmw_vmci/Kconfig" source "drivers/misc/mic/Kconfig" source "drivers/misc/genwqe/Kconfig" source "drivers/misc/echo/Kconfig" +source "drivers/misc/cxl/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 5497d026e651..7d5c4cd118c4 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -55,3 +55,4 @@ obj-y += mic/ obj-$(CONFIG_GENWQE) += genwqe/ obj-$(CONFIG_ECHO) += echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o +obj-$(CONFIG_CXL_BASE) += cxl/ diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig new file mode 100644 index 000000000000..5cdd3199f97d --- /dev/null +++ b/drivers/misc/cxl/Kconfig @@ -0,0 +1,8 @@ +# +# IBM Coherent Accelerator (CXL) compatible devices +# + +config CXL_BASE + bool + default n + select PPC_COPRO_BASE diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile new file mode 100644 index 000000000000..e30ad0ab0cf5 --- /dev/null +++ b/drivers/misc/cxl/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c new file mode 100644 index 000000000000..0654ad83675e --- /dev/null +++ b/drivers/misc/cxl/base.c @@ -0,0 +1,86 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include "cxl.h" + +/* protected by rcu */ +static struct cxl_calls *cxl_calls; + +atomic_t cxl_use_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(cxl_use_count); + +#ifdef CONFIG_CXL_MODULE + +static inline struct cxl_calls *cxl_calls_get(void) +{ + struct cxl_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(cxl_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) +{ + BUG_ON(calls != cxl_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(cxl_calls->owner); +} + +#else /* !defined CONFIG_CXL_MODULE */ + +static inline struct cxl_calls *cxl_calls_get(void) +{ + return cxl_calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) { } + +#endif /* CONFIG_CXL_MODULE */ + +void cxl_slbia(struct mm_struct *mm) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + if (cxl_ctx_in_use()) + calls->cxl_slbia(mm); + + cxl_calls_put(calls); +} + +int register_cxl_calls(struct cxl_calls *calls) +{ + if (cxl_calls) + return -EBUSY; + + rcu_assign_pointer(cxl_calls, calls); + return 0; +} +EXPORT_SYMBOL_GPL(register_cxl_calls); + +void unregister_cxl_calls(struct cxl_calls *calls) +{ + BUG_ON(cxl_calls->owner != calls->owner); + RCU_INIT_POINTER(cxl_calls, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(unregister_cxl_calls); From f204e0b8cedd7da1dfcfd05ed6b7692737e24029 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:02 +1100 Subject: [PATCH 126/130] cxl: Driver code for powernv PCIe based cards for userspace access This is the core of the cxl driver. It adds support for using cxl cards in the powernv environment only (ie POWER8 bare metal). It allows access to cxl accelerators by userspace using the /dev/cxl/afuM.N char devices. The kernel driver has no knowledge of the function implemented by the accelerator. It provides services to userspace via the /dev/cxl/afuM.N devices. When a program opens this device and runs the start work IOCTL, the accelerator will have coherent access to that processes memory using the same virtual addresses. That process may mmap the device to access any MMIO space the accelerator provides. Also, reads on the device will allow interrupts to be received. These services are further documented in a later patch in Documentation/powerpc/cxl.txt. Documentation of the cxl hardware architecture and userspace API is provided in subsequent patches. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 193 +++++++ drivers/misc/cxl/cxl.h | 629 +++++++++++++++++++++++ drivers/misc/cxl/debugfs.c | 132 +++++ drivers/misc/cxl/fault.c | 291 +++++++++++ drivers/misc/cxl/file.c | 508 ++++++++++++++++++ drivers/misc/cxl/irq.c | 402 +++++++++++++++ drivers/misc/cxl/main.c | 230 +++++++++ drivers/misc/cxl/native.c | 683 ++++++++++++++++++++++++ drivers/misc/cxl/pci.c | 1000 ++++++++++++++++++++++++++++++++++++ drivers/misc/cxl/sysfs.c | 385 ++++++++++++++ 10 files changed, 4453 insertions(+) create mode 100644 drivers/misc/cxl/context.c create mode 100644 drivers/misc/cxl/cxl.h create mode 100644 drivers/misc/cxl/debugfs.c create mode 100644 drivers/misc/cxl/fault.c create mode 100644 drivers/misc/cxl/file.c create mode 100644 drivers/misc/cxl/irq.c create mode 100644 drivers/misc/cxl/main.c create mode 100644 drivers/misc/cxl/native.c create mode 100644 drivers/misc/cxl/pci.c create mode 100644 drivers/misc/cxl/sysfs.c diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c new file mode 100644 index 000000000000..cca472109135 --- /dev/null +++ b/drivers/misc/cxl/context.c @@ -0,0 +1,193 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +/* + * Allocates space for a CXL context. + */ +struct cxl_context *cxl_context_alloc(void) +{ + return kzalloc(sizeof(struct cxl_context), GFP_KERNEL); +} + +/* + * Initialises a CXL context. + */ +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) +{ + int i; + + spin_lock_init(&ctx->sste_lock); + ctx->afu = afu; + ctx->master = master; + ctx->pid = NULL; /* Set in start work ioctl */ + + /* + * Allocate the segment table before we put it in the IDR so that we + * can always access it when dereferenced from IDR. For the same + * reason, the segment table is only destroyed after the context is + * removed from the IDR. Access to this in the IOCTL is protected by + * Linux filesytem symantics (can't IOCTL until open is complete). + */ + i = cxl_alloc_sst(ctx); + if (i) + return i; + + INIT_WORK(&ctx->fault_work, cxl_handle_fault); + + init_waitqueue_head(&ctx->wq); + spin_lock_init(&ctx->lock); + + ctx->irq_bitmap = NULL; + ctx->pending_irq = false; + ctx->pending_fault = false; + ctx->pending_afu_err = false; + + /* + * When we have to destroy all contexts in cxl_context_detach_all() we + * end up with afu_release_irqs() called from inside a + * idr_for_each_entry(). Hence we need to make sure that anything + * dereferenced from this IDR is ok before we allocate the IDR here. + * This clears out the IRQ ranges to ensure this. + */ + for (i = 0; i < CXL_IRQ_RANGES; i++) + ctx->irqs.range[i] = 0; + + mutex_init(&ctx->status_mutex); + + ctx->status = OPENED; + + /* + * Allocating IDR! We better make sure everything's setup that + * dereferences from it. + */ + idr_preload(GFP_KERNEL); + spin_lock(&afu->contexts_lock); + i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, + ctx->afu->num_procs, GFP_NOWAIT); + spin_unlock(&afu->contexts_lock); + idr_preload_end(); + if (i < 0) + return i; + + ctx->pe = i; + ctx->elem = &ctx->afu->spa[i]; + ctx->pe_inserted = false; + return 0; +} + +/* + * Map a per-context mmio space into the given vma. + */ +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) +{ + u64 len = vma->vm_end - vma->vm_start; + len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return vm_iomap_memory(vma, ctx->afu->psn_phys, ctx->afu->adapter->ps_size); + } + + /* make sure there is a valid per process space for this AFU */ + if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) { + pr_devel("AFU doesn't support mmio space\n"); + return -EINVAL; + } + + /* Can't mmap until the AFU is enabled */ + if (!ctx->afu->enabled) + return -EBUSY; + + pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__, + ctx->psn_phys, ctx->pe , ctx->master); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return vm_iomap_memory(vma, ctx->psn_phys, len); +} + +/* + * Detach a context from the hardware. This disables interrupts and doesn't + * return until all outstanding interrupts for this context have completed. The + * hardware should no longer access *ctx after this has returned. + */ +static void __detach_context(struct cxl_context *ctx) +{ + enum cxl_context_status status; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != STARTED) + return; + + WARN_ON(cxl_detach_process(ctx)); + afu_release_irqs(ctx); + flush_work(&ctx->fault_work); /* Only needed for dedicated process */ + wake_up_all(&ctx->wq); +} + +/* + * Detach the given context from the AFU. This doesn't actually + * free the context but it should stop the context running in hardware + * (ie. prevent this context from generating any further interrupts + * so that it can be freed). + */ +void cxl_context_detach(struct cxl_context *ctx) +{ + __detach_context(ctx); +} + +/* + * Detach all contexts on the given AFU. + */ +void cxl_context_detach_all(struct cxl_afu *afu) +{ + struct cxl_context *ctx; + int tmp; + + rcu_read_lock(); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) + /* + * Anything done in here needs to be setup before the IDR is + * created and torn down after the IDR removed + */ + __detach_context(ctx); + rcu_read_unlock(); +} + +void cxl_context_free(struct cxl_context *ctx) +{ + spin_lock(&ctx->afu->contexts_lock); + idr_remove(&ctx->afu->contexts_idr, ctx->pe); + spin_unlock(&ctx->afu->contexts_lock); + synchronize_rcu(); + + free_page((u64)ctx->sstp); + ctx->sstp = NULL; + + put_pid(ctx->pid); + kfree(ctx); +} diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h new file mode 100644 index 000000000000..3d2b8677ec8a --- /dev/null +++ b/drivers/misc/cxl/cxl.h @@ -0,0 +1,629 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _CXL_H_ +#define _CXL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern uint cxl_verbose; + +#define CXL_TIMEOUT 5 + +/* + * Bump version each time a user API change is made, whether it is + * backwards compatible ot not. + */ +#define CXL_API_VERSION 1 +#define CXL_API_VERSION_COMPATIBLE 1 + +/* + * Opaque types to avoid accidentally passing registers for the wrong MMIO + * + * At the end of the day, I'm not married to using typedef here, but it might + * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and + * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write. + * + * I'm quite happy if these are changed back to #defines before upstreaming, it + * should be little more than a regexp search+replace operation in this file. + */ +typedef struct { + const int x; +} cxl_p1_reg_t; +typedef struct { + const int x; +} cxl_p1n_reg_t; +typedef struct { + const int x; +} cxl_p2n_reg_t; +#define cxl_reg_off(reg) \ + (reg.x) + +/* Memory maps. Ref CXL Appendix A */ + +/* PSL Privilege 1 Memory Map */ +/* Configuration and Control area */ +static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000}; +static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008}; +static const cxl_p1_reg_t CXL_PSL_KEY1 = {0x0010}; +static const cxl_p1_reg_t CXL_PSL_KEY2 = {0x0018}; +static const cxl_p1_reg_t CXL_PSL_Control = {0x0020}; +/* Downloading */ +static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; +static const cxl_p1_reg_t CXL_PSL_DLADDR = {0x0068}; + +/* PSL Lookaside Buffer Management Area */ +static const cxl_p1_reg_t CXL_PSL_LBISEL = {0x0080}; +static const cxl_p1_reg_t CXL_PSL_SLBIE = {0x0088}; +static const cxl_p1_reg_t CXL_PSL_SLBIA = {0x0090}; +static const cxl_p1_reg_t CXL_PSL_TLBIE = {0x00A0}; +static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; +static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; + +/* 0x00C0:7EFF Implementation dependent area */ +static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; +static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; +static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; +static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; +static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; +static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; +static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; +static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; +/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ +/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ + +/* PSL Slice Privilege 1 Memory Map */ +/* Configuration Area */ +static const cxl_p1n_reg_t CXL_PSL_SR_An = {0x00}; +static const cxl_p1n_reg_t CXL_PSL_LPID_An = {0x08}; +static const cxl_p1n_reg_t CXL_PSL_AMBAR_An = {0x10}; +static const cxl_p1n_reg_t CXL_PSL_SPOffset_An = {0x18}; +static const cxl_p1n_reg_t CXL_PSL_ID_An = {0x20}; +static const cxl_p1n_reg_t CXL_PSL_SERR_An = {0x28}; +/* Memory Management and Lookaside Buffer Management */ +static const cxl_p1n_reg_t CXL_PSL_SDR_An = {0x30}; +static const cxl_p1n_reg_t CXL_PSL_AMOR_An = {0x38}; +/* Pointer Area */ +static const cxl_p1n_reg_t CXL_HAURP_An = {0x80}; +static const cxl_p1n_reg_t CXL_PSL_SPAP_An = {0x88}; +static const cxl_p1n_reg_t CXL_PSL_LLCMD_An = {0x90}; +/* Control Area */ +static const cxl_p1n_reg_t CXL_PSL_SCNTL_An = {0xA0}; +static const cxl_p1n_reg_t CXL_PSL_CtxTime_An = {0xA8}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An = {0xB8}; +/* 0xC0:FF Implementation Dependent Area */ +static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An = {0xC0}; +static const cxl_p1n_reg_t CXL_AFU_DEBUG_An = {0xC8}; +static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A = {0xD0}; +static const cxl_p1n_reg_t CXL_PSL_COALLOC_A = {0xD8}; +static const cxl_p1n_reg_t CXL_PSL_RXCTL_A = {0xE0}; +static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE = {0xE8}; + +/* PSL Slice Privilege 2 Memory Map */ +/* Configuration and Control Area */ +static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000}; +static const cxl_p2n_reg_t CXL_CSRP_An = {0x008}; +static const cxl_p2n_reg_t CXL_AURP0_An = {0x010}; +static const cxl_p2n_reg_t CXL_AURP1_An = {0x018}; +static const cxl_p2n_reg_t CXL_SSTP0_An = {0x020}; +static const cxl_p2n_reg_t CXL_SSTP1_An = {0x028}; +static const cxl_p2n_reg_t CXL_PSL_AMR_An = {0x030}; +/* Segment Lookaside Buffer Management */ +static const cxl_p2n_reg_t CXL_SLBIE_An = {0x040}; +static const cxl_p2n_reg_t CXL_SLBIA_An = {0x048}; +static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050}; +/* Interrupt Registers */ +static const cxl_p2n_reg_t CXL_PSL_DSISR_An = {0x060}; +static const cxl_p2n_reg_t CXL_PSL_DAR_An = {0x068}; +static const cxl_p2n_reg_t CXL_PSL_DSR_An = {0x070}; +static const cxl_p2n_reg_t CXL_PSL_TFC_An = {0x078}; +static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080}; +static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088}; +/* AFU Registers */ +static const cxl_p2n_reg_t CXL_AFU_Cntl_An = {0x090}; +static const cxl_p2n_reg_t CXL_AFU_ERR_An = {0x098}; +/* Work Element Descriptor */ +static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; +/* 0x0C0:FFF Implementation Dependent Area */ + +#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL +#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL +#define CXL_PSL_SPAP_Size_Shift 4 +#define CXL_PSL_SPAP_V 0x0000000000000001ULL + +/****** CXL_PSL_DLCNTL *****************************************************/ +#define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) +#define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) +#define CXL_PSL_DLCNTL_E (0x1ull << (63-30)) +#define CXL_PSL_DLCNTL_S (0x1ull << (63-31)) +#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E) +#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S) + +/****** CXL_PSL_SR_An ******************************************************/ +#define CXL_PSL_SR_An_SF MSR_SF /* 64bit */ +#define CXL_PSL_SR_An_TA (1ull << (63-1)) /* Tags active, GA1: 0 */ +#define CXL_PSL_SR_An_HV MSR_HV /* Hypervisor, GA1: 0 */ +#define CXL_PSL_SR_An_PR MSR_PR /* Problem state, GA1: 1 */ +#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */ +#define CXL_PSL_SR_An_TC (1ull << (63-54)) /* Page Table secondary hash */ +#define CXL_PSL_SR_An_US (1ull << (63-56)) /* User state, GA1: X */ +#define CXL_PSL_SR_An_SC (1ull << (63-58)) /* Segment Table secondary hash */ +#define CXL_PSL_SR_An_R MSR_DR /* Relocate, GA1: 1 */ +#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */ +#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */ + +/****** CXL_PSL_LLCMD_An ****************************************************/ +#define CXL_LLCMD_TERMINATE 0x0001000000000000ULL +#define CXL_LLCMD_REMOVE 0x0002000000000000ULL +#define CXL_LLCMD_SUSPEND 0x0003000000000000ULL +#define CXL_LLCMD_RESUME 0x0004000000000000ULL +#define CXL_LLCMD_ADD 0x0005000000000000ULL +#define CXL_LLCMD_UPDATE 0x0006000000000000ULL +#define CXL_LLCMD_HANDLE_MASK 0x000000000000ffffULL + +/****** CXL_PSL_ID_An ****************************************************/ +#define CXL_PSL_ID_An_F (1ull << (63-31)) +#define CXL_PSL_ID_An_L (1ull << (63-30)) + +/****** CXL_PSL_SCNTL_An ****************************************************/ +#define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) +/* Programming Modes: */ +#define CXL_PSL_SCNTL_An_PM_MASK (0xffffull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Shared (0x0000ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_OS (0x0001ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Process (0x0002ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU (0x0004ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU_PBT (0x0104ull << (63-31)) +/* Purge Status (ro) */ +#define CXL_PSL_SCNTL_An_Ps_MASK (0x3ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Pending (0x1ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39)) +/* Purge */ +#define CXL_PSL_SCNTL_An_Pc (0x1ull << (63-48)) +/* Suspend Status (ro) */ +#define CXL_PSL_SCNTL_An_Ss_MASK (0x3ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Pending (0x1ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55)) +/* Suspend Control */ +#define CXL_PSL_SCNTL_An_Sc (0x1ull << (63-63)) + +/* AFU Slice Enable Status (ro) */ +#define CXL_AFU_Cntl_An_ES_MASK (0x7ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Enabled (0x4ull << (63-2)) +/* AFU Slice Enable */ +#define CXL_AFU_Cntl_An_E (0x1ull << (63-3)) +/* AFU Slice Reset status (ro) */ +#define CXL_AFU_Cntl_An_RS_MASK (0x3ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Pending (0x1ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5)) +/* AFU Slice Reset */ +#define CXL_AFU_Cntl_An_RA (0x1ull << (63-7)) + +/****** CXL_SSTP0/1_An ******************************************************/ +/* These top bits are for the segment that CONTAINS the segment table */ +#define CXL_SSTP0_An_B_SHIFT SLB_VSID_SSIZE_SHIFT +#define CXL_SSTP0_An_KS (1ull << (63-2)) +#define CXL_SSTP0_An_KP (1ull << (63-3)) +#define CXL_SSTP0_An_N (1ull << (63-4)) +#define CXL_SSTP0_An_L (1ull << (63-5)) +#define CXL_SSTP0_An_C (1ull << (63-6)) +#define CXL_SSTP0_An_TA (1ull << (63-7)) +#define CXL_SSTP0_An_LP_SHIFT (63-9) /* 2 Bits */ +/* And finally, the virtual address & size of the segment table: */ +#define CXL_SSTP0_An_SegTableSize_SHIFT (63-31) /* 12 Bits */ +#define CXL_SSTP0_An_SegTableSize_MASK \ + (((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT) +#define CXL_SSTP0_An_STVA_U_MASK ((1ull << (63-49))-1) +#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1)) +#define CXL_SSTP1_An_V (1ull << (63-63)) + +/****** CXL_PSL_SLBIE_[An] **************************************************/ +/* write: */ +#define CXL_SLBIE_C PPC_BIT(36) /* Class */ +#define CXL_SLBIE_SS PPC_BITMASK(37, 38) /* Segment Size */ +#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38) +#define CXL_SLBIE_TA PPC_BIT(38) /* Tags Active */ +/* read: */ +#define CXL_SLBIE_MAX PPC_BITMASK(24, 31) +#define CXL_SLBIE_PENDING PPC_BITMASK(56, 63) + +/****** Common to all CXL_TLBIA/SLBIA_[An] **********************************/ +#define CXL_TLB_SLB_P (1ull) /* Pending (read) */ + +/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers **********************/ +#define CXL_TLB_SLB_IQ_ALL (0ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPID (1ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */ + +/****** CXL_PSL_AFUSEL ******************************************************/ +#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */ + +/****** CXL_PSL_DSISR_An ****************************************************/ +#define CXL_PSL_DSISR_An_DS (1ull << (63-0)) /* Segment not found */ +#define CXL_PSL_DSISR_An_DM (1ull << (63-1)) /* PTE not found (See also: M) or protection fault */ +#define CXL_PSL_DSISR_An_ST (1ull << (63-2)) /* Segment Table PTE not found */ +#define CXL_PSL_DSISR_An_UR (1ull << (63-3)) /* AURP PTE not found */ +#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR) +#define CXL_PSL_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ +#define CXL_PSL_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ +#define CXL_PSL_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ +/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */ +#define CXL_PSL_DSISR_An_M DSISR_NOHPTE /* PTE not found */ +#define CXL_PSL_DSISR_An_P DSISR_PROTFAULT /* Storage protection violation */ +#define CXL_PSL_DSISR_An_A (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */ +#define CXL_PSL_DSISR_An_S DSISR_ISSTORE /* Access was afu_wr or afu_zero */ +#define CXL_PSL_DSISR_An_K DSISR_KEYFAULT /* Access not permitted by virtual page class key protection */ + +/****** CXL_PSL_TFC_An ******************************************************/ +#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */ +#define CXL_PSL_TFC_An_C (1ull << (63-29)) /* Continue (abort transaction) */ +#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */ +#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */ + +/* cxl_process_element->software_status */ +#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 - 0)) /* Valid */ +#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */ +#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */ +#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */ + +/* SPA->sw_command_status */ +#define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL +#define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL +#define CXL_SPA_SW_CMD_REMOVE 0x0002000000000000ULL +#define CXL_SPA_SW_CMD_SUSPEND 0x0003000000000000ULL +#define CXL_SPA_SW_CMD_RESUME 0x0004000000000000ULL +#define CXL_SPA_SW_CMD_ADD 0x0005000000000000ULL +#define CXL_SPA_SW_CMD_UPDATE 0x0006000000000000ULL +#define CXL_SPA_SW_STATE_MASK 0x0000ffff00000000ULL +#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL +#define CXL_SPA_SW_STATE_REMOVED 0x0000000200000000ULL +#define CXL_SPA_SW_STATE_SUSPENDED 0x0000000300000000ULL +#define CXL_SPA_SW_STATE_RESUMED 0x0000000400000000ULL +#define CXL_SPA_SW_STATE_ADDED 0x0000000500000000ULL +#define CXL_SPA_SW_STATE_UPDATED 0x0000000600000000ULL +#define CXL_SPA_SW_PSL_ID_MASK 0x00000000ffff0000ULL +#define CXL_SPA_SW_LINK_MASK 0x000000000000ffffULL + +#define CXL_MAX_SLICES 4 +#define MAX_AFU_MMIO_REGS 3 + +#define CXL_MODE_DEDICATED 0x1 +#define CXL_MODE_DIRECTED 0x2 +#define CXL_MODE_TIME_SLICED 0x4 +#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) + +enum cxl_context_status { + CLOSED, + OPENED, + STARTED +}; + +enum prefault_modes { + CXL_PREFAULT_NONE, + CXL_PREFAULT_WED, + CXL_PREFAULT_ALL, +}; + +struct cxl_sste { + __be64 esid_data; + __be64 vsid_data; +}; + +#define to_cxl_adapter(d) container_of(d, struct cxl, dev) +#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) + +struct cxl_afu { + irq_hw_number_t psl_hwirq; + irq_hw_number_t serr_hwirq; + unsigned int serr_virq; + void __iomem *p1n_mmio; + void __iomem *p2n_mmio; + phys_addr_t psn_phys; + u64 pp_offset; + u64 pp_size; + void __iomem *afu_desc_mmio; + struct cxl *adapter; + struct device dev; + struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; + struct device *chardev_s, *chardev_m, *chardev_d; + struct idr contexts_idr; + struct dentry *debugfs; + spinlock_t contexts_lock; + struct mutex spa_mutex; + spinlock_t afu_cntl_lock; + + /* + * Only the first part of the SPA is used for the process element + * linked list. The only other part that software needs to worry about + * is sw_command_status, which we store a separate pointer to. + * Everything else in the SPA is only used by hardware + */ + struct cxl_process_element *spa; + __be64 *sw_command_status; + unsigned int spa_size; + int spa_order; + int spa_max_procs; + unsigned int psl_virq; + + int pp_irqs; + int irqs_max; + int num_procs; + int max_procs_virtualised; + int slice; + int modes_supported; + int current_mode; + enum prefault_modes prefault_mode; + bool psa; + bool pp_psa; + bool enabled; +}; + +/* + * This is a cxl context. If the PSL is in dedicated mode, there will be one + * of these per AFU. If in AFU directed there can be lots of these. + */ +struct cxl_context { + struct cxl_afu *afu; + + /* Problem state MMIO */ + phys_addr_t psn_phys; + u64 psn_size; + + spinlock_t sste_lock; /* Protects segment table entries */ + struct cxl_sste *sstp; + u64 sstp0, sstp1; + unsigned int sst_size, sst_lru; + + wait_queue_head_t wq; + struct pid *pid; + spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */ + /* Only used in PR mode */ + u64 process_token; + + unsigned long *irq_bitmap; /* Accessed from IRQ context */ + struct cxl_irq_ranges irqs; + u64 fault_addr; + u64 fault_dsisr; + u64 afu_err; + + /* + * This status and it's lock pretects start and detach context + * from racing. It also prevents detach from racing with + * itself + */ + enum cxl_context_status status; + struct mutex status_mutex; + + + /* XXX: Is it possible to need multiple work items at once? */ + struct work_struct fault_work; + u64 dsisr; + u64 dar; + + struct cxl_process_element *elem; + + int pe; /* process element handle */ + u32 irq_count; + bool pe_inserted; + bool master; + bool kernel; + bool pending_irq; + bool pending_fault; + bool pending_afu_err; +}; + +struct cxl { + void __iomem *p1_mmio; + void __iomem *p2_mmio; + irq_hw_number_t err_hwirq; + unsigned int err_virq; + spinlock_t afu_list_lock; + struct cxl_afu *afu[CXL_MAX_SLICES]; + struct device dev; + struct dentry *trace; + struct dentry *psl_err_chk; + struct dentry *debugfs; + struct bin_attribute cxl_attr; + int adapter_num; + int user_irqs; + u64 afu_desc_off; + u64 afu_desc_size; + u64 ps_off; + u64 ps_size; + u16 psl_rev; + u16 base_image; + u8 vsec_status; + u8 caia_major; + u8 caia_minor; + u8 slices; + bool user_image_loaded; + bool perst_loads_image; + bool perst_select_user; +}; + +int cxl_alloc_one_irq(struct cxl *adapter); +void cxl_release_one_irq(struct cxl *adapter, int hwirq); +int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); +void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); +int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); + +/* common == phyp + powernv */ +struct cxl_process_element_common { + __be32 tid; + __be32 pid; + __be64 csrp; + __be64 aurp0; + __be64 aurp1; + __be64 sstp0; + __be64 sstp1; + __be64 amr; + u8 reserved3[4]; + __be64 wed; +} __packed; + +/* just powernv */ +struct cxl_process_element { + __be64 sr; + __be64 SPOffset; + __be64 sdr; + __be64 haurp; + __be32 ctxtime; + __be16 ivte_offsets[4]; + __be16 ivte_ranges[4]; + __be32 lpid; + struct cxl_process_element_common common; + __be32 software_state; +} __packed; + +static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return cxl->p1_mmio + cxl_reg_off(reg); +} + +#define cxl_p1_write(cxl, reg, val) \ + out_be64(_cxl_p1_addr(cxl, reg), val) +#define cxl_p1_read(cxl, reg) \ + in_be64(_cxl_p1_addr(cxl, reg)) + +static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return afu->p1n_mmio + cxl_reg_off(reg); +} + +#define cxl_p1n_write(afu, reg, val) \ + out_be64(_cxl_p1n_addr(afu, reg), val) +#define cxl_p1n_read(afu, reg) \ + in_be64(_cxl_p1n_addr(afu, reg)) + +static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + return afu->p2n_mmio + cxl_reg_off(reg); +} + +#define cxl_p2n_write(afu, reg, val) \ + out_be64(_cxl_p2n_addr(afu, reg), val) +#define cxl_p2n_read(afu, reg) \ + in_be64(_cxl_p2n_addr(afu, reg)) + +struct cxl_calls { + void (*cxl_slbia)(struct mm_struct *mm); + struct module *owner; +}; +int register_cxl_calls(struct cxl_calls *calls); +void unregister_cxl_calls(struct cxl_calls *calls); + +int cxl_alloc_adapter_nr(struct cxl *adapter); +void cxl_remove_adapter_nr(struct cxl *adapter); + +int cxl_file_init(void); +void cxl_file_exit(void); +int cxl_register_adapter(struct cxl *adapter); +int cxl_register_afu(struct cxl_afu *afu); +int cxl_chardev_d_afu_add(struct cxl_afu *afu); +int cxl_chardev_m_afu_add(struct cxl_afu *afu); +int cxl_chardev_s_afu_add(struct cxl_afu *afu); +void cxl_chardev_afu_remove(struct cxl_afu *afu); + +void cxl_context_detach_all(struct cxl_afu *afu); +void cxl_context_free(struct cxl_context *ctx); +void cxl_context_detach(struct cxl_context *ctx); + +int cxl_sysfs_adapter_add(struct cxl *adapter); +void cxl_sysfs_adapter_remove(struct cxl *adapter); +int cxl_sysfs_afu_add(struct cxl_afu *afu); +void cxl_sysfs_afu_remove(struct cxl_afu *afu); +int cxl_sysfs_afu_m_add(struct cxl_afu *afu); +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); + +int cxl_afu_activate_mode(struct cxl_afu *afu, int mode); +int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); +int cxl_afu_deactivate_mode(struct cxl_afu *afu); +int cxl_afu_select_best_mode(struct cxl_afu *afu); + +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie); +void cxl_unmap_irq(unsigned int virq, void *cookie); +int cxl_register_psl_irq(struct cxl_afu *afu); +void cxl_release_psl_irq(struct cxl_afu *afu); +int cxl_register_psl_err_irq(struct cxl *adapter); +void cxl_release_psl_err_irq(struct cxl *adapter); +int cxl_register_serr_irq(struct cxl_afu *afu); +void cxl_release_serr_irq(struct cxl_afu *afu); +int afu_register_irqs(struct cxl_context *ctx, u32 count); +void afu_release_irqs(struct cxl_context *ctx); +irqreturn_t cxl_slice_irq_err(int irq, void *data); + +int cxl_debugfs_init(void); +void cxl_debugfs_exit(void); +int cxl_debugfs_adapter_add(struct cxl *adapter); +void cxl_debugfs_adapter_remove(struct cxl *adapter); +int cxl_debugfs_afu_add(struct cxl_afu *afu); +void cxl_debugfs_afu_remove(struct cxl_afu *afu); + +void cxl_handle_fault(struct work_struct *work); +void cxl_prefault(struct cxl_context *ctx, u64 wed); + +struct cxl *get_cxl_adapter(int num); +int cxl_alloc_sst(struct cxl_context *ctx); + +void init_cxl_native(void); + +struct cxl_context *cxl_context_alloc(void); +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master); +void cxl_context_free(struct cxl_context *ctx); +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); + +/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ +struct cxl_irq_info { + u64 dsisr; + u64 dar; + u64 dsr; + u32 pid; + u32 tid; + u64 afu_err; + u64 errstat; + u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ +}; + +int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, + u64 amr); +int cxl_detach_process(struct cxl_context *ctx); + +int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info); +int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); + +int cxl_check_error(struct cxl_afu *afu); +int cxl_afu_slbia(struct cxl_afu *afu); +int cxl_tlb_slb_invalidate(struct cxl *adapter); +int cxl_afu_disable(struct cxl_afu *afu); +int cxl_afu_reset(struct cxl_afu *afu); +int cxl_psl_purge(struct cxl_afu *afu); + +void cxl_stop_trace(struct cxl *cxl); + +extern struct pci_driver cxl_pci_driver; + +#endif diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c new file mode 100644 index 000000000000..825c412580bc --- /dev/null +++ b/drivers/misc/cxl/debugfs.c @@ -0,0 +1,132 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "cxl.h" + +static struct dentry *cxl_debugfs; + +void cxl_stop_trace(struct cxl *adapter) +{ + int slice; + + /* Stop the trace */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL); + + /* Stop the slice traces */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + if (adapter->afu[slice]) + cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, 0x8000000000000000LL); + } + spin_unlock(&adapter->afu_list_lock); +} + +/* Helpers to export CXL mmaped IO registers via debugfs */ +static int debugfs_io_u64_get(void *data, u64 *val) +{ + *val = in_be64((u64 __iomem *)data); + return 0; +} + +static int debugfs_io_u64_set(void *data, u64 val) +{ + out_be64((u64 __iomem *)data, val); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x%016llx\n"); + +static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, + struct dentry *parent, u64 __iomem *value) +{ + return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64); +} + +int cxl_debugfs_adapter_add(struct cxl *adapter) +{ + struct dentry *dir; + char buf[32]; + + if (!cxl_debugfs) + return -ENODEV; + + snprintf(buf, 32, "card%i", adapter->adapter_num); + dir = debugfs_create_dir(buf, cxl_debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + adapter->debugfs = dir; + + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); + debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); + debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); + + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); + + return 0; +} + +void cxl_debugfs_adapter_remove(struct cxl *adapter) +{ + debugfs_remove_recursive(adapter->debugfs); +} + +int cxl_debugfs_afu_add(struct cxl_afu *afu) +{ + struct dentry *dir; + char buf[32]; + + if (!afu->adapter->debugfs) + return -ENODEV; + + snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice); + dir = debugfs_create_dir(buf, afu->adapter->debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + afu->debugfs = dir; + + debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); + debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); + debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); + + debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); + debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); + debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); + debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); + debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); + + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); + + return 0; +} + +void cxl_debugfs_afu_remove(struct cxl_afu *afu) +{ + debugfs_remove_recursive(afu->debugfs); +} + +int __init cxl_debugfs_init(void) +{ + struct dentry *ent; + ent = debugfs_create_dir("cxl", NULL); + if (IS_ERR(ent)) + return PTR_ERR(ent); + cxl_debugfs = ent; + + return 0; +} + +void cxl_debugfs_exit(void) +{ + debugfs_remove_recursive(cxl_debugfs); +} diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c new file mode 100644 index 000000000000..69506ebd4d07 --- /dev/null +++ b/drivers/misc/cxl/fault.c @@ -0,0 +1,291 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "cxl" "." +#include +#include +#include + +#include "cxl.h" + +static struct cxl_sste* find_free_sste(struct cxl_sste *primary_group, + bool sec_hash, + struct cxl_sste *secondary_group, + unsigned int *lru) +{ + unsigned int i, entry; + struct cxl_sste *sste, *group = primary_group; + + for (i = 0; i < 2; i++) { + for (entry = 0; entry < 8; entry++) { + sste = group + entry; + if (!(be64_to_cpu(sste->esid_data) & SLB_ESID_V)) + return sste; + } + if (!sec_hash) + break; + group = secondary_group; + } + /* Nothing free, select an entry to cast out */ + if (sec_hash && (*lru & 0x8)) + sste = secondary_group + (*lru & 0x7); + else + sste = primary_group + (*lru & 0x7); + *lru = (*lru + 1) & 0xf; + + return sste; +} + +static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) +{ + /* mask is the group index, we search primary and secondary here. */ + unsigned int mask = (ctx->sst_size >> 7)-1; /* SSTP0[SegTableSize] */ + bool sec_hash = 1; + struct cxl_sste *sste; + unsigned int hash; + unsigned long flags; + + + sec_hash = !!(cxl_p1n_read(ctx->afu, CXL_PSL_SR_An) & CXL_PSL_SR_An_SC); + + if (slb->vsid & SLB_VSID_B_1T) + hash = (slb->esid >> SID_SHIFT_1T) & mask; + else /* 256M */ + hash = (slb->esid >> SID_SHIFT) & mask; + + spin_lock_irqsave(&ctx->sste_lock, flags); + sste = find_free_sste(ctx->sstp + (hash << 3), sec_hash, + ctx->sstp + ((~hash & mask) << 3), &ctx->sst_lru); + + pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", + sste - ctx->sstp, slb->vsid, slb->esid); + + sste->vsid_data = cpu_to_be64(slb->vsid); + sste->esid_data = cpu_to_be64(slb->esid); + spin_unlock_irqrestore(&ctx->sste_lock, flags); +} + +static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm, + u64 ea) +{ + struct copro_slb slb = {0,0}; + int rc; + + if (!(rc = copro_calculate_slb(mm, ea, &slb))) { + cxl_load_segment(ctx, &slb); + } + + return rc; +} + +static void cxl_ack_ae(struct cxl_context *ctx) +{ + unsigned long flags; + + cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); + + spin_lock_irqsave(&ctx->lock, flags); + ctx->pending_fault = true; + ctx->fault_addr = ctx->dar; + ctx->fault_dsisr = ctx->dsisr; + spin_unlock_irqrestore(&ctx->lock, flags); + + wake_up_all(&ctx->wq); +} + +static int cxl_handle_segment_miss(struct cxl_context *ctx, + struct mm_struct *mm, u64 ea) +{ + int rc; + + pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); + + if ((rc = cxl_fault_segment(ctx, mm, ea))) + cxl_ack_ae(ctx); + else { + + mb(); /* Order seg table write to TFC MMIO write */ + cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + } + + return IRQ_HANDLED; +} + +static void cxl_handle_page_fault(struct cxl_context *ctx, + struct mm_struct *mm, u64 dsisr, u64 dar) +{ + unsigned flt = 0; + int result; + unsigned long access, flags; + + if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { + pr_devel("copro_handle_mm_fault failed: %#x\n", result); + return cxl_ack_ae(ctx); + } + + /* + * update_mmu_cache() will not have loaded the hash since current->trap + * is not a 0x400 or 0x300, so just call hash_page_mm() here. + */ + access = _PAGE_PRESENT; + if (dsisr & CXL_PSL_DSISR_An_S) + access |= _PAGE_RW; + if ((!ctx->kernel) || ~(dar & (1ULL << 63))) + access |= _PAGE_USER; + local_irq_save(flags); + hash_page_mm(mm, dar, access, 0x300); + local_irq_restore(flags); + + pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); + cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); +} + +void cxl_handle_fault(struct work_struct *fault_work) +{ + struct cxl_context *ctx = + container_of(fault_work, struct cxl_context, fault_work); + u64 dsisr = ctx->dsisr; + u64 dar = ctx->dar; + struct task_struct *task; + struct mm_struct *mm; + + if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || + cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || + cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { + /* Most likely explanation is harmless - a dedicated process + * has detached and these were cleared by the PSL purge, but + * warn about it just in case */ + dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); + return; + } + + pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " + "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_handle_fault unable to get task %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_handle_fault unable to get mm %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + goto out; + } + + if (dsisr & CXL_PSL_DSISR_An_DS) + cxl_handle_segment_miss(ctx, mm, dar); + else if (dsisr & CXL_PSL_DSISR_An_DM) + cxl_handle_page_fault(ctx, mm, dsisr, dar); + else + WARN(1, "cxl_handle_fault has nothing to handle\n"); + + mmput(mm); +out: + put_task_struct(task); +} + +static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) +{ + int rc; + struct task_struct *task; + struct mm_struct *mm; + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_prefault_one unable to get task %i\n", + pid_nr(ctx->pid)); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_prefault_one unable to get mm %i\n", + pid_nr(ctx->pid)); + put_task_struct(task); + return; + } + + rc = cxl_fault_segment(ctx, mm, ea); + + mmput(mm); + put_task_struct(task); +} + +static u64 next_segment(u64 ea, u64 vsid) +{ + if (vsid & SLB_VSID_B_1T) + ea |= (1ULL << 40) - 1; + else + ea |= (1ULL << 28) - 1; + + return ea + 1; +} + +static void cxl_prefault_vma(struct cxl_context *ctx) +{ + u64 ea, last_esid = 0; + struct copro_slb slb; + struct vm_area_struct *vma; + int rc; + struct task_struct *task; + struct mm_struct *mm; + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_prefault_vma unable to get task %i\n", + pid_nr(ctx->pid)); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_prefault_vm unable to get mm %i\n", + pid_nr(ctx->pid)); + goto out1; + } + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + for (ea = vma->vm_start; ea < vma->vm_end; + ea = next_segment(ea, slb.vsid)) { + rc = copro_calculate_slb(mm, ea, &slb); + if (rc) + continue; + + if (last_esid == slb.esid) + continue; + + cxl_load_segment(ctx, &slb); + last_esid = slb.esid; + } + } + up_read(&mm->mmap_sem); + + mmput(mm); +out1: + put_task_struct(task); +} + +void cxl_prefault(struct cxl_context *ctx, u64 wed) +{ + switch (ctx->afu->prefault_mode) { + case CXL_PREFAULT_WED: + cxl_prefault_one(ctx, wed); + break; + case CXL_PREFAULT_ALL: + cxl_prefault_vma(ctx); + break; + default: + break; + } +} diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c new file mode 100644 index 000000000000..847b7e646a7e --- /dev/null +++ b/drivers/misc/cxl/file.c @@ -0,0 +1,508 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +#define CXL_NUM_MINORS 256 /* Total to reserve */ +#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ + +#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) +#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) +#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) +#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) +#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu)) +#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) +#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) + +#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) +#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) + +#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) + +static dev_t cxl_dev; + +static struct class *cxl_class; + +static int __afu_open(struct inode *inode, struct file *file, bool master) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + int slice = CXL_DEVT_AFU(inode->i_rdev); + int rc = -ENODEV; + + pr_devel("afu_open afu%i.%i\n", slice, adapter_num); + + if (!(adapter = get_cxl_adapter(adapter_num))) + return -ENODEV; + + if (slice > adapter->slices) + goto err_put_adapter; + + spin_lock(&adapter->afu_list_lock); + if (!(afu = adapter->afu[slice])) { + spin_unlock(&adapter->afu_list_lock); + goto err_put_adapter; + } + get_device(&afu->dev); + spin_unlock(&adapter->afu_list_lock); + + if (!afu->current_mode) + goto err_put_afu; + + if (!(ctx = cxl_context_alloc())) { + rc = -ENOMEM; + goto err_put_afu; + } + + if ((rc = cxl_context_init(ctx, afu, master))) + goto err_put_afu; + + pr_devel("afu_open pe: %i\n", ctx->pe); + file->private_data = ctx; + cxl_ctx_get(); + + /* Our ref on the AFU will now hold the adapter */ + put_device(&adapter->dev); + + return 0; + +err_put_afu: + put_device(&afu->dev); +err_put_adapter: + put_device(&adapter->dev); + return rc; +} +static int afu_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, false); +} + +static int afu_master_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, true); +} + +static int afu_release(struct inode *inode, struct file *file) +{ + struct cxl_context *ctx = file->private_data; + + pr_devel("%s: closing cxl file descriptor. pe: %i\n", + __func__, ctx->pe); + cxl_context_detach(ctx); + + put_device(&ctx->afu->dev); + + /* + * At this this point all bottom halfs have finished and we should be + * getting no more IRQs from the hardware for this context. Once it's + * removed from the IDR (and RCU synchronised) it's safe to free the + * sstp and context. + */ + cxl_context_free(ctx); + + cxl_ctx_put(); + return 0; +} + +static long afu_ioctl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work __user *uwork) +{ + struct cxl_ioctl_start_work work; + u64 amr = 0; + int rc; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + if (copy_from_user(&work, uwork, + sizeof(struct cxl_ioctl_start_work))) { + rc = -EFAULT; + goto out; + } + + /* + * if any of the reserved fields are set or any of the unused + * flags are set it's invalid + */ + if (work.reserved1 || work.reserved2 || work.reserved3 || + work.reserved4 || work.reserved5 || work.reserved6 || + (work.flags & ~CXL_START_WORK_ALL)) { + rc = -EINVAL; + goto out; + } + + if (!(work.flags & CXL_START_WORK_NUM_IRQS)) + work.num_interrupts = ctx->afu->pp_irqs; + else if ((work.num_interrupts < ctx->afu->pp_irqs) || + (work.num_interrupts > ctx->afu->irqs_max)) { + rc = -EINVAL; + goto out; + } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) + goto out; + + if (work.flags & CXL_START_WORK_AMR) + amr = work.amr & mfspr(SPRN_UAMOR); + + /* + * We grab the PID here and not in the file open to allow for the case + * where a process (master, some daemon, etc) has opened the chardev on + * behalf of another process, so the AFU's mm gets bound to the process + * that performs this ioctl and not the process that opened the file. + */ + ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID)); + + if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, + amr))) + goto out; + + ctx->status = STARTED; + rc = 0; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} +static long afu_ioctl_process_element(struct cxl_context *ctx, + int __user *upe) +{ + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + if (copy_to_user(upe, &ctx->pe, sizeof(__u32))) + return -EFAULT; + + return 0; +} + +static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl_context *ctx = file->private_data; + + if (ctx->status == CLOSED) + return -EIO; + + pr_devel("afu_ioctl\n"); + switch (cmd) { + case CXL_IOCTL_START_WORK: + return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); + case CXL_IOCTL_GET_PROCESS_ELEMENT: + return afu_ioctl_process_element(ctx, (__u32 __user *)arg); + } + return -EINVAL; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} + +static int afu_mmap(struct file *file, struct vm_area_struct *vm) +{ + struct cxl_context *ctx = file->private_data; + + /* AFU must be started before we can MMIO */ + if (ctx->status != STARTED) + return -EIO; + + return cxl_context_iomap(ctx, vm); +} + +static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) +{ + struct cxl_context *ctx = file->private_data; + int mask = 0; + unsigned long flags; + + + poll_wait(file, &ctx->wq, poll); + + pr_devel("afu_poll wait done pe: %i\n", ctx->pe); + + spin_lock_irqsave(&ctx->lock, flags); + if (ctx->pending_irq || ctx->pending_fault || + ctx->pending_afu_err) + mask |= POLLIN | POLLRDNORM; + else if (ctx->status == CLOSED) + /* Only error on closed when there are no futher events pending + */ + mask |= POLLERR; + spin_unlock_irqrestore(&ctx->lock, flags); + + pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask); + + return mask; +} + +static inline int ctx_event_pending(struct cxl_context *ctx) +{ + return (ctx->pending_irq || ctx->pending_fault || + ctx->pending_afu_err || (ctx->status == CLOSED)); +} + +static ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct cxl_context *ctx = file->private_data; + struct cxl_event event; + unsigned long flags; + DEFINE_WAIT(wait); + + if (count < CXL_READ_MIN_SIZE) + return -EINVAL; + + spin_lock_irqsave(&ctx->lock, flags); + + for (;;) { + prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); + if (ctx_event_pending(ctx)) + break; + + spin_unlock_irqrestore(&ctx->lock, flags); + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (signal_pending(current)) + return -ERESTARTSYS; + + pr_devel("afu_read going to sleep...\n"); + schedule(); + pr_devel("afu_read woken up\n"); + spin_lock_irqsave(&ctx->lock, flags); + } + + finish_wait(&ctx->wq, &wait); + + memset(&event, 0, sizeof(event)); + event.header.process_element = ctx->pe; + event.header.size = sizeof(struct cxl_event_header); + if (ctx->pending_irq) { + pr_devel("afu_read delivering AFU interrupt\n"); + event.header.size += sizeof(struct cxl_event_afu_interrupt); + event.header.type = CXL_EVENT_AFU_INTERRUPT; + event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1; + clear_bit(event.irq.irq - 1, ctx->irq_bitmap); + if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count)) + ctx->pending_irq = false; + } else if (ctx->pending_fault) { + pr_devel("afu_read delivering data storage fault\n"); + event.header.size += sizeof(struct cxl_event_data_storage); + event.header.type = CXL_EVENT_DATA_STORAGE; + event.fault.addr = ctx->fault_addr; + event.fault.dsisr = ctx->fault_dsisr; + ctx->pending_fault = false; + } else if (ctx->pending_afu_err) { + pr_devel("afu_read delivering afu error\n"); + event.header.size += sizeof(struct cxl_event_afu_error); + event.header.type = CXL_EVENT_AFU_ERROR; + event.afu_error.error = ctx->afu_err; + ctx->pending_afu_err = false; + } else if (ctx->status == CLOSED) { + pr_devel("afu_read fatal error\n"); + spin_unlock_irqrestore(&ctx->lock, flags); + return -EIO; + } else + WARN(1, "afu_read must be buggy\n"); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(buf, &event, event.header.size)) + return -EFAULT; + return event.header.size; +} + +static const struct file_operations afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + +static const struct file_operations afu_master_fops = { + .owner = THIS_MODULE, + .open = afu_master_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + + +static char *cxl_devnode(struct device *dev, umode_t *mode) +{ + if (CXL_DEVT_IS_CARD(dev->devt)) { + /* + * These minor numbers will eventually be used to program the + * PSL and AFUs once we have dynamic reprogramming support + */ + return NULL; + } + return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); +} + +extern struct class *cxl_class; + +static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev, + struct device **chardev, char *postfix, char *desc, + const struct file_operations *fops) +{ + struct device *dev; + int rc; + + cdev_init(cdev, fops); + if ((rc = cdev_add(cdev, devt, 1))) { + dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc); + return rc; + } + + dev = device_create(cxl_class, &afu->dev, devt, afu, + "afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix); + if (IS_ERR(dev)) { + dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc); + rc = PTR_ERR(dev); + goto err; + } + + *chardev = dev; + + return 0; +err: + cdev_del(cdev); + return rc; +} + +int cxl_chardev_d_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d, + &afu->chardev_d, "d", "dedicated", + &afu_master_fops); /* Uses master fops */ +} + +int cxl_chardev_m_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m, + &afu->chardev_m, "m", "master", + &afu_master_fops); +} + +int cxl_chardev_s_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s, + &afu->chardev_s, "s", "shared", + &afu_fops); +} + +void cxl_chardev_afu_remove(struct cxl_afu *afu) +{ + if (afu->chardev_d) { + cdev_del(&afu->afu_cdev_d); + device_unregister(afu->chardev_d); + afu->chardev_d = NULL; + } + if (afu->chardev_m) { + cdev_del(&afu->afu_cdev_m); + device_unregister(afu->chardev_m); + afu->chardev_m = NULL; + } + if (afu->chardev_s) { + cdev_del(&afu->afu_cdev_s); + device_unregister(afu->chardev_s); + afu->chardev_s = NULL; + } +} + +int cxl_register_afu(struct cxl_afu *afu) +{ + afu->dev.class = cxl_class; + + return device_register(&afu->dev); +} + +int cxl_register_adapter(struct cxl *adapter) +{ + adapter->dev.class = cxl_class; + + /* + * Future: When we support dynamically reprogramming the PSL & AFU we + * will expose the interface to do that via a chardev: + * adapter->dev.devt = CXL_CARD_MKDEV(adapter); + */ + + return device_register(&adapter->dev); +} + +int __init cxl_file_init(void) +{ + int rc; + + /* + * If these change we really need to update API. Either change some + * flags or update API version number CXL_API_VERSION. + */ + BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); + BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16); + + if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) { + pr_err("Unable to allocate CXL major number: %i\n", rc); + return rc; + } + + pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev)); + + cxl_class = class_create(THIS_MODULE, "cxl"); + if (IS_ERR(cxl_class)) { + pr_err("Unable to create CXL class\n"); + rc = PTR_ERR(cxl_class); + goto err; + } + cxl_class->devnode = cxl_devnode; + + return 0; + +err: + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + return rc; +} + +void cxl_file_exit(void) +{ + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + class_destroy(cxl_class); +} diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c new file mode 100644 index 000000000000..336020c8e1af --- /dev/null +++ b/drivers/misc/cxl/irq.c @@ -0,0 +1,402 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +/* XXX: This is implementation specific */ +static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat) +{ + u64 fir1, fir2, fir_slice, serr, afu_debug; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); + fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%.16llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%.16llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(ctx->afu->adapter); + + return cxl_ack_irq(ctx, 0, errstat); +} + +irqreturn_t cxl_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + u64 fir_slice, errstat, serr, afu_debug; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); + errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%.16llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_err(int irq, void *data) +{ + struct cxl *adapter = data; + u64 fir1, fir2, err_ivte; + + WARN(1, "CXL ERROR interrupt %i\n", irq); + + err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%.16llx\n", err_ivte); + + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(adapter); + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%.16llx\nPSL_FIR2: 0x%.16llx\n", fir1, fir2); + + return IRQ_HANDLED; +} + +static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) +{ + ctx->dsisr = dsisr; + ctx->dar = dar; + schedule_work(&ctx->fault_work); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq(int irq, void *data) +{ + struct cxl_context *ctx = data; + struct cxl_irq_info irq_info; + u64 dsisr, dar; + int result; + + if ((result = cxl_get_irq(ctx, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", result); + return IRQ_HANDLED; + } + + dsisr = irq_info.dsisr; + dar = irq_info.dar; + + pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); + + if (dsisr & CXL_PSL_DSISR_An_DS) { + /* + * We don't inherently need to sleep to handle this, but we do + * need to get a ref to the task's mm, which we can't do from + * irq context without the potential for a deadlock since it + * takes the task_lock. An alternate option would be to keep a + * reference to the task's mm the entire time it has cxl open, + * but to do that we need to solve the issue where we hold a + * ref to the mm, but the mm can hold a ref to the fd after an + * mmap preventing anything from being cleaned up. + */ + pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + + if (dsisr & CXL_PSL_DSISR_An_M) + pr_devel("CXL interrupt: PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_P) + pr_devel("CXL interrupt: Storage protection violation\n"); + if (dsisr & CXL_PSL_DSISR_An_A) + pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n"); + if (dsisr & CXL_PSL_DSISR_An_S) + pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n"); + if (dsisr & CXL_PSL_DSISR_An_K) + pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n"); + + if (dsisr & CXL_PSL_DSISR_An_DM) { + /* + * In some cases we might be able to handle the fault + * immediately if hash_page would succeed, but we still need + * the task's mm, which as above we can't get without a lock + */ + pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + if (dsisr & CXL_PSL_DSISR_An_ST) + WARN(1, "CXL interrupt: Segment Table PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_UR) + pr_devel("CXL interrupt: AURP PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_PE) + return handle_psl_slice_error(ctx, dsisr, irq_info.errstat); + if (dsisr & CXL_PSL_DSISR_An_AE) { + pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err); + + if (ctx->pending_afu_err) { + /* + * This shouldn't happen - the PSL treats these errors + * as fatal and will have reset the AFU, so there's not + * much point buffering multiple AFU errors. + * OTOH if we DO ever see a storm of these come in it's + * probably best that we log them somewhere: + */ + dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " + "undelivered to pe %i: %.llx\n", + ctx->pe, irq_info.afu_err); + } else { + spin_lock(&ctx->lock); + ctx->afu_err = irq_info.afu_err; + ctx->pending_afu_err = 1; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + } + + cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + } + if (dsisr & CXL_PSL_DSISR_An_OC) + pr_devel("CXL interrupt: OS Context Warning\n"); + + WARN(1, "Unhandled CXL PSL IRQ\n"); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_multiplexed(int irq, void *data) +{ + struct cxl_afu *afu = data; + struct cxl_context *ctx; + int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; + int ret; + + rcu_read_lock(); + ctx = idr_find(&afu->contexts_idr, ph); + if (ctx) { + ret = cxl_irq(irq, ctx); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + WARN(1, "Unable to demultiplex CXL PSL IRQ\n"); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_afu(int irq, void *data) +{ + struct cxl_context *ctx = data; + irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); + int irq_off, afu_irq = 1; + __u16 range; + int r; + + for (r = 1; r < CXL_IRQ_RANGES; r++) { + irq_off = hwirq - ctx->irqs.offset[r]; + range = ctx->irqs.range[r]; + if (irq_off >= 0 && irq_off < range) { + afu_irq += irq_off; + break; + } + afu_irq += range; + } + if (unlikely(r >= CXL_IRQ_RANGES)) { + WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", + ctx->pe, irq, hwirq); + return IRQ_HANDLED; + } + + pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", + afu_irq, ctx->pe, irq, hwirq); + + if (unlikely(!ctx->irq_bitmap)) { + WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n"); + return IRQ_HANDLED; + } + spin_lock(&ctx->lock); + set_bit(afu_irq - 1, ctx->irq_bitmap); + ctx->pending_irq = true; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + + return IRQ_HANDLED; +} + +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie) +{ + unsigned int virq; + int result; + + /* IRQ Domain? */ + virq = irq_create_mapping(NULL, hwirq); + if (!virq) { + dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n"); + return 0; + } + + cxl_setup_irq(adapter, hwirq, virq); + + pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); + + result = request_irq(virq, handler, 0, "cxl", cookie); + if (result) { + dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); + return 0; + } + + return virq; +} + +void cxl_unmap_irq(unsigned int virq, void *cookie) +{ + free_irq(virq, cookie); + irq_dispose_mapping(virq); +} + +static int cxl_register_one_irq(struct cxl *adapter, + irq_handler_t handler, + void *cookie, + irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq) +{ + int hwirq, virq; + + if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) + return hwirq; + + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie))) + goto err; + + *dest_hwirq = hwirq; + *dest_virq = virq; + + return 0; + +err: + cxl_release_one_irq(adapter, hwirq); + return -ENOMEM; +} + +int cxl_register_psl_err_irq(struct cxl *adapter) +{ + int rc; + + if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, + &adapter->err_hwirq, + &adapter->err_virq))) + return rc; + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); + + return 0; +} + +void cxl_release_psl_err_irq(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + cxl_unmap_irq(adapter->err_virq, adapter); + cxl_release_one_irq(adapter, adapter->err_hwirq); +} + +int cxl_register_serr_irq(struct cxl_afu *afu) +{ + u64 serr; + int rc; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, + &afu->serr_hwirq, + &afu->serr_virq))) + return rc; + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return 0; +} + +void cxl_release_serr_irq(struct cxl_afu *afu) +{ + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); + cxl_unmap_irq(afu->serr_virq, afu); + cxl_release_one_irq(afu->adapter, afu->serr_hwirq); +} + +int cxl_register_psl_irq(struct cxl_afu *afu) +{ + return cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, + &afu->psl_hwirq, &afu->psl_virq); +} + +void cxl_release_psl_irq(struct cxl_afu *afu) +{ + cxl_unmap_irq(afu->psl_virq, afu); + cxl_release_one_irq(afu->adapter, afu->psl_hwirq); +} + +int afu_register_irqs(struct cxl_context *ctx, u32 count) +{ + irq_hw_number_t hwirq; + int rc, r, i; + + if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) + return rc; + + /* Multiplexed PSL Interrupt */ + ctx->irqs.offset[0] = ctx->afu->psl_hwirq; + ctx->irqs.range[0] = 1; + + ctx->irq_count = count; + ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), + sizeof(*ctx->irq_bitmap), GFP_KERNEL); + if (!ctx->irq_bitmap) + return -ENOMEM; + for (r = 1; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + cxl_map_irq(ctx->afu->adapter, hwirq, + cxl_irq_afu, ctx); + } + } + + return 0; +} + +void afu_release_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + for (r = 1; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, ctx); + } + } + + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c new file mode 100644 index 000000000000..4cde9b661642 --- /dev/null +++ b/drivers/misc/cxl/main.c @@ -0,0 +1,230 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +static DEFINE_SPINLOCK(adapter_idr_lock); +static DEFINE_IDR(cxl_adapter_idr); + +uint cxl_verbose; +module_param_named(verbose, cxl_verbose, uint, 0600); +MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); + +static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) +{ + struct task_struct *task; + unsigned long flags; + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("%s unable to get task %i\n", + __func__, pid_nr(ctx->pid)); + return; + } + + if (task->mm != mm) + goto out_put; + + pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__, + ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); + + spin_lock_irqsave(&ctx->sste_lock, flags); + memset(ctx->sstp, 0, ctx->sst_size); + spin_unlock_irqrestore(&ctx->sste_lock, flags); + mb(); + cxl_afu_slbia(ctx->afu); +out_put: + put_task_struct(task); +} + +static inline void cxl_slbia_core(struct mm_struct *mm) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int card, slice, id; + + pr_devel("%s called\n", __func__); + + spin_lock(&adapter_idr_lock); + idr_for_each_entry(&cxl_adapter_idr, adapter, card) { + /* XXX: Make this lookup faster with link from mm to ctx */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + afu = adapter->afu[slice]; + if (!afu->enabled) + continue; + rcu_read_lock(); + idr_for_each_entry(&afu->contexts_idr, ctx, id) + _cxl_slbia(ctx, mm); + rcu_read_unlock(); + } + spin_unlock(&adapter->afu_list_lock); + } + spin_unlock(&adapter_idr_lock); +} + +static struct cxl_calls cxl_calls = { + .cxl_slbia = cxl_slbia_core, + .owner = THIS_MODULE, +}; + +int cxl_alloc_sst(struct cxl_context *ctx) +{ + unsigned long vsid; + u64 ea_mask, size, sstp0, sstp1; + + sstp0 = 0; + sstp1 = 0; + + ctx->sst_size = PAGE_SIZE; + ctx->sst_lru = 0; + ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL); + if (!ctx->sstp) { + pr_err("cxl_alloc_sst: Unable to allocate segment table\n"); + return -ENOMEM; + } + pr_devel("SSTP allocated at 0x%p\n", ctx->sstp); + + vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12; + + sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT; + sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; + + size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT; + if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) { + WARN(1, "Impossible segment table size\n"); + return -EINVAL; + } + sstp0 |= size; + + if (mmu_kernel_ssize == MMU_SEGSIZE_256M) + ea_mask = 0xfffff00ULL; + else + ea_mask = 0xffffffff00ULL; + + sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */ + sstp1 |= (vsid << (64-(50-14))) & ~ea_mask; + sstp1 |= (u64)ctx->sstp & ea_mask; + sstp1 |= CXL_SSTP1_An_V; + + pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n", + (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1); + + /* Store calculated sstp hardware points for use later */ + ctx->sstp0 = sstp0; + ctx->sstp1 = sstp1; + + return 0; +} + +/* Find a CXL adapter by it's number and increase it's refcount */ +struct cxl *get_cxl_adapter(int num) +{ + struct cxl *adapter; + + spin_lock(&adapter_idr_lock); + if ((adapter = idr_find(&cxl_adapter_idr, num))) + get_device(&adapter->dev); + spin_unlock(&adapter_idr_lock); + + return adapter; +} + +int cxl_alloc_adapter_nr(struct cxl *adapter) +{ + int i; + + idr_preload(GFP_KERNEL); + spin_lock(&adapter_idr_lock); + i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT); + spin_unlock(&adapter_idr_lock); + idr_preload_end(); + if (i < 0) + return i; + + adapter->adapter_num = i; + + return 0; +} + +void cxl_remove_adapter_nr(struct cxl *adapter) +{ + idr_remove(&cxl_adapter_idr, adapter->adapter_num); +} + +int cxl_afu_select_best_mode(struct cxl_afu *afu) +{ + if (afu->modes_supported & CXL_MODE_DIRECTED) + return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED); + + if (afu->modes_supported & CXL_MODE_DEDICATED) + return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED); + + dev_warn(&afu->dev, "No supported programming modes available\n"); + /* We don't fail this so the user can inspect sysfs */ + return 0; +} + +static int __init init_cxl(void) +{ + int rc = 0; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EPERM; + + if ((rc = cxl_file_init())) + return rc; + + cxl_debugfs_init(); + + if ((rc = register_cxl_calls(&cxl_calls))) + goto err; + + if ((rc = pci_register_driver(&cxl_pci_driver))) + goto err1; + + return 0; +err1: + unregister_cxl_calls(&cxl_calls); +err: + cxl_debugfs_exit(); + cxl_file_exit(); + + return rc; +} + +static void exit_cxl(void) +{ + pci_unregister_driver(&cxl_pci_driver); + + cxl_debugfs_exit(); + cxl_file_exit(); + unregister_cxl_calls(&cxl_calls); +} + +module_init(init_cxl); +module_exit(exit_cxl); + +MODULE_DESCRIPTION("IBM Coherent Accelerator"); +MODULE_AUTHOR("Ian Munsie "); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c new file mode 100644 index 000000000000..623286a77114 --- /dev/null +++ b/drivers/misc/cxl/native.c @@ -0,0 +1,683 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +static int afu_control(struct cxl_afu *afu, u64 command, + u64 result, u64 mask, bool enabled) +{ + u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + spin_lock(&afu->afu_cntl_lock); + pr_devel("AFU command starting: %llx\n", command); + + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command); + + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + while ((AFU_Cntl & mask) != result) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: AFU control timed out!\n"); + spin_unlock(&afu->afu_cntl_lock); + return -EBUSY; + } + pr_devel_ratelimited("AFU control... (0x%.16llx)\n", + AFU_Cntl | command); + cpu_relax(); + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + }; + pr_devel("AFU command complete: %llx\n", command); + afu->enabled = enabled; + spin_unlock(&afu->afu_cntl_lock); + + return 0; +} + +static int afu_enable(struct cxl_afu *afu) +{ + pr_devel("AFU enable request\n"); + + return afu_control(afu, CXL_AFU_Cntl_An_E, + CXL_AFU_Cntl_An_ES_Enabled, + CXL_AFU_Cntl_An_ES_MASK, true); +} + +int cxl_afu_disable(struct cxl_afu *afu) +{ + pr_devel("AFU disable request\n"); + + return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_ES_MASK, false); +} + +/* This will disable as well as reset */ +int cxl_afu_reset(struct cxl_afu *afu) +{ + pr_devel("AFU reset request\n"); + + return afu_control(afu, CXL_AFU_Cntl_An_RA, + CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, + false); +} + +static int afu_check_and_enable(struct cxl_afu *afu) +{ + if (afu->enabled) + return 0; + return afu_enable(afu); +} + +int cxl_psl_purge(struct cxl_afu *afu) +{ + u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + u64 dsisr, dar; + u64 start, end; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("PSL purge request\n"); + + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + WARN(1, "psl_purge request while AFU not disabled!\n"); + cxl_afu_disable(afu); + } + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL | CXL_PSL_SCNTL_An_Pc); + start = local_clock(); + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK) + == CXL_PSL_SCNTL_An_Ps_Pending) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n"); + return -EBUSY; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr); + if (dsisr & CXL_PSL_DSISR_TRANS) { + dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + } else if (dsisr) { + dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } else { + cpu_relax(); + } + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + }; + end = local_clock(); + pr_devel("PSL purged in %lld ns\n", end - start); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc); + return 0; +} + +static int spa_max_procs(int spa_size) +{ + /* + * From the CAIA: + * end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255 + * Most of that junk is really just an overly-complicated way of saying + * the last 256 bytes are __aligned(128), so it's really: + * end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255 + * and + * end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1 + * so + * sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256 + * Ignore the alignment (which is safe in this case as long as we are + * careful with our rounding) and solve for n: + */ + return ((spa_size / 8) - 96) / 17; +} + +static int alloc_spa(struct cxl_afu *afu) +{ + u64 spap; + + /* Work out how many pages to allocate */ + afu->spa_order = 0; + do { + afu->spa_order++; + afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE; + afu->spa_max_procs = spa_max_procs(afu->spa_size); + } while (afu->spa_max_procs < afu->num_procs); + + WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */ + + if (!(afu->spa = (struct cxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) { + pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); + return -ENOMEM; + } + pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", + 1<spa_order, afu->spa_max_procs, afu->num_procs); + + afu->sw_command_status = (__be64 *)((char *)afu->spa + + ((afu->spa_max_procs + 3) * 128)); + + spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr; + spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; + spap |= CXL_PSL_SPAP_V; + pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); + cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); + + return 0; +} + +static void release_spa(struct cxl_afu *afu) +{ + free_pages((unsigned long) afu->spa, afu->spa_order); +} + +int cxl_tlb_slb_invalidate(struct cxl *adapter) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("CXL adapter wide TLBIA & SLBIA\n"); + + cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A); + + cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + return 0; +} + +int cxl_afu_slbia(struct cxl_afu *afu) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("cxl_afu_slbia issuing SLBIA command\n"); + cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); + while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + return 0; +} + +static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1) +{ + int rc; + + /* 1. Disable SSTP by writing 0 to SSTP1[V] */ + cxl_p2n_write(afu, CXL_SSTP1_An, 0); + + /* 2. Invalidate all SLB entries */ + if ((rc = cxl_afu_slbia(afu))) + return rc; + + /* 3. Set SSTP0_An */ + cxl_p2n_write(afu, CXL_SSTP0_An, sstp0); + + /* 4. Set SSTP1_An */ + cxl_p2n_write(afu, CXL_SSTP1_An, sstp1); + + return 0; +} + +/* Using per slice version may improve performance here. (ie. SLBIA_An) */ +static void slb_invalid(struct cxl_context *ctx) +{ + struct cxl *adapter = ctx->afu->adapter; + u64 slbia; + + WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex)); + + cxl_p1_write(adapter, CXL_PSL_LBISEL, + ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | + be32_to_cpu(ctx->elem->lpid)); + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); + + while (1) { + slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); + if (!(slbia & CXL_TLB_SLB_P)) + break; + cpu_relax(); + } +} + +static int do_process_element_cmd(struct cxl_context *ctx, + u64 cmd, u64 pe_state) +{ + u64 state; + + WARN_ON(!ctx->afu->enabled); + + ctx->elem->software_state = cpu_to_be32(pe_state); + smp_wmb(); + *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); + smp_mb(); + cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); + while (1) { + state = be64_to_cpup(ctx->afu->sw_command_status); + if (state == ~0ULL) { + pr_err("cxl: Error adding process element to AFU\n"); + return -1; + } + if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) == + (cmd | (cmd >> 16) | ctx->pe)) + break; + /* + * The command won't finish in the PSL if there are + * outstanding DSIs. Hence we need to yield here in + * case there are outstanding DSIs that we need to + * service. Tuning possiblity: we could wait for a + * while before sched + */ + schedule(); + + } + return 0; +} + +static int add_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); + if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) + ctx->pe_inserted = true; + pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + return rc; +} + +static int terminate_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + /* fast path terminate if it's already invalid */ + if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) + return rc; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + ctx->elem->software_state = 0; /* Remove Valid bit */ + pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + return rc; +} + +static int remove_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); + if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + ctx->pe_inserted = false; + slb_invalid(ctx); + pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + + return rc; +} + + +static void assign_psn_space(struct cxl_context *ctx) +{ + if (!ctx->afu->pp_size || ctx->master) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = ctx->afu->psn_phys + + (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe); + ctx->psn_size = ctx->afu->pp_size; + } +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU directed mode\n"); + + if (alloc_spa(afu)) + return -ENOMEM; + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); + + afu->current_mode = CXL_MODE_DIRECTED; + afu->num_procs = afu->max_procs_virtualised; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE) +#else +#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) +#endif + +static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) +{ + u64 sr; + int r, result; + + assign_psn_space(ctx); + + ctx->elem->ctxtime = 0; /* disable */ + ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + ctx->elem->haurp = 0; /* disable */ + ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1)); + + sr = CXL_PSL_SR_An_SC; + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + /* HV=0, PR=1, R=1 for userspace + * For kernel contexts: this would need to change + */ + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + set_endian(sr); + sr &= ~(CXL_PSL_SR_An_HV); + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + ctx->elem->common.pid = cpu_to_be32(current->pid); + ctx->elem->common.tid = 0; + ctx->elem->sr = cpu_to_be64(sr); + + ctx->elem->common.csrp = 0; /* disable */ + ctx->elem->common.aurp0 = 0; /* disable */ + ctx->elem->common.aurp1 = 0; /* disable */ + + cxl_prefault(ctx, wed); + + ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0); + ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1); + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } + + ctx->elem->common.amr = cpu_to_be64(amr); + ctx->elem->common.wed = cpu_to_be64(wed); + + /* first guy needs to enable */ + if ((result = afu_check_and_enable(ctx->afu))) + return result; + + add_process_element(ctx); + + return 0; +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU directed mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + cxl_afu_reset(afu); + cxl_afu_disable(afu); + cxl_psl_purge(afu); + + release_spa(afu); + + return 0; +} + +static int activate_dedicated_process(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Activating dedicated process mode\n"); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); + + cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID)); + cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1)); + + cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */ + + afu->current_mode = CXL_MODE_DEDICATED; + afu->num_procs = 1; + + return cxl_chardev_d_afu_add(afu); +} + +static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_afu *afu = ctx->afu; + u64 sr; + int rc; + + sr = CXL_PSL_SR_An_SC; + set_endian(sr); + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32); + cxl_p1n_write(afu, CXL_PSL_SR_An, sr); + + if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) + return rc; + + cxl_prefault(ctx, wed); + + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, + (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | + (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | + (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | + ((u64)ctx->irqs.offset[3] & 0xffff)); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) + (((u64)ctx->irqs.range[0] & 0xffff) << 48) | + (((u64)ctx->irqs.range[1] & 0xffff) << 32) | + (((u64)ctx->irqs.range[2] & 0xffff) << 16) | + ((u64)ctx->irqs.range[3] & 0xffff)); + + cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); + + /* master only context for dedicated */ + assign_psn_space(ctx); + + if ((rc = cxl_afu_reset(afu))) + return rc; + + cxl_p2n_write(afu, CXL_PSL_WED_An, wed); + + return afu_enable(afu); +} + +static int deactivate_dedicated_process(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating dedicated process mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_chardev_afu_remove(afu); + + return 0; +} + +int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + if (mode == CXL_MODE_DEDICATED) + return deactivate_dedicated_process(afu); + return 0; +} + +int cxl_afu_deactivate_mode(struct cxl_afu *afu) +{ + return _cxl_afu_deactivate_mode(afu, afu->current_mode); +} + +int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + if (mode == CXL_MODE_DEDICATED) + return activate_dedicated_process(afu); + + return -EINVAL; +} + +int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +{ + ctx->kernel = kernel; + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return attach_afu_directed(ctx, wed, amr); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return attach_dedicated(ctx, wed, amr); + + return -EINVAL; +} + +static inline int detach_process_native_dedicated(struct cxl_context *ctx) +{ + cxl_afu_reset(ctx->afu); + cxl_afu_disable(ctx->afu); + cxl_psl_purge(ctx->afu); + return 0; +} + +/* + * TODO: handle case when this is called inside a rcu_read_lock() which may + * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate + * & remove use a mutex lock and schedule which will not good with lock held. + * May need to write do_process_element_cmd() that handles outstanding page + * faults synchronously. + */ +static inline int detach_process_native_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (terminate_process_element(ctx)) + return -1; + if (remove_process_element(ctx)) + return -1; + + return 0; +} + +int cxl_detach_process(struct cxl_context *ctx) +{ + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return detach_process_native_dedicated(ctx); + + return detach_process_native_afu_directed(ctx); +} + +int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info) +{ + u64 pidtid; + + info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An); + info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An); + pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An); + info->pid = pidtid >> 32; + info->tid = pidtid & 0xffffffff; + info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An); + + return 0; +} + +static void recover_psl_err(struct cxl_afu *afu, u64 errstat) +{ + u64 dsisr; + + pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat); + + /* Clear PSL_DSISR[PE] */ + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE); + + /* Write 1s to clear error status bits */ + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); +} + +int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + if (tfc) + cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc); + if (psl_reset_mask) + recover_psl_err(ctx->afu, psl_reset_mask); + + return 0; +} + +int cxl_check_error(struct cxl_afu *afu) +{ + return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); +} diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c new file mode 100644 index 000000000000..10c98ab7f46e --- /dev/null +++ b/drivers/misc/cxl/pci.c @@ -0,0 +1,1000 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for struct pci_controller */ +#include + +#include "cxl.h" + + +#define CXL_PCI_VSEC_ID 0x1280 +#define CXL_VSEC_MIN_SIZE 0x80 + +#define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \ + { \ + pci_read_config_word(dev, vsec + 0x6, dest); \ + *dest >>= 4; \ + } +#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x8, dest) + +#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x9, dest) +#define CXL_STATUS_SECOND_PORT 0x80 +#define CXL_STATUS_MSI_X_FULL 0x40 +#define CXL_STATUS_MSI_X_SINGLE 0x20 +#define CXL_STATUS_FLASH_RW 0x08 +#define CXL_STATUS_FLASH_RO 0x04 +#define CXL_STATUS_LOADABLE_AFU 0x02 +#define CXL_STATUS_LOADABLE_PSL 0x01 +/* If we see these features we won't try to use the card */ +#define CXL_UNSUPPORTED_FEATURES \ + (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE) + +#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xa, dest) +#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0xa, val) +#define CXL_VSEC_PROTOCOL_MASK 0xe0 +#define CXL_VSEC_PROTOCOL_1024TB 0x80 +#define CXL_VSEC_PROTOCOL_512TB 0x40 +#define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8 uses this */ +#define CXL_VSEC_PROTOCOL_ENABLE 0x01 + +#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0xc, dest) +#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xe, dest) +#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xf, dest) +#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0x10, dest) + +#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x13, dest) +#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0x13, val) +#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */ +#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */ +#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */ + +#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x20, dest) +#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x24, dest) +#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x28, dest) +#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x2c, dest) + + +/* This works a little different than the p1/p2 register accesses to make it + * easier to pull out individual fields */ +#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) +#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) +#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) + +#define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0) +#define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15) +#define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31) +#define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47) +#define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48) +#define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55) +#define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59) +#define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61) +#define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63) +#define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20) +#define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28) +#define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30) +#define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6) +#define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7) +#define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38) +#define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40) +#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) + +static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, + { PCI_DEVICE_CLASS(0x120000, ~0), }, + + { } +}; +MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); + + +/* + * Mostly using these wrappers to avoid confusion: + * priv 1 is BAR2, while priv 2 is BAR0 + */ +static inline resource_size_t p1_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 2); +} + +static inline resource_size_t p1_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 2); +} + +static inline resource_size_t p2_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 0); +} + +static inline resource_size_t p2_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 0); +} + +static int find_cxl_vsec(struct pci_dev *dev) +{ + int vsec = 0; + u16 val; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { + pci_read_config_word(dev, vsec + 0x4, &val); + if (val == CXL_PCI_VSEC_ID) + return vsec; + } + return 0; + +} + +static void dump_cxl_config_space(struct pci_dev *dev) +{ + int vsec; + u32 val; + + dev_info(&dev->dev, "dump_cxl_config_space\n"); + + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val); + dev_info(&dev->dev, "BAR0: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val); + dev_info(&dev->dev, "BAR1: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val); + dev_info(&dev->dev, "BAR2: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val); + dev_info(&dev->dev, "BAR3: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val); + dev_info(&dev->dev, "BAR4: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val); + dev_info(&dev->dev, "BAR5: %#.8x\n", val); + + dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", + p1_base(dev), p1_size(dev)); + dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", + p1_base(dev), p2_size(dev)); + dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", + pci_resource_start(dev, 4), pci_resource_len(dev, 4)); + + if (!(vsec = find_cxl_vsec(dev))) + return; + +#define show_reg(name, what) \ + dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what) + + pci_read_config_dword(dev, vsec + 0x0, &val); + show_reg("Cap ID", (val >> 0) & 0xffff); + show_reg("Cap Ver", (val >> 16) & 0xf); + show_reg("Next Cap Ptr", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x4, &val); + show_reg("VSEC ID", (val >> 0) & 0xffff); + show_reg("VSEC Rev", (val >> 16) & 0xf); + show_reg("VSEC Length", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x8, &val); + show_reg("Num AFUs", (val >> 0) & 0xff); + show_reg("Status", (val >> 8) & 0xff); + show_reg("Mode Control", (val >> 16) & 0xff); + show_reg("Reserved", (val >> 24) & 0xff); + pci_read_config_dword(dev, vsec + 0xc, &val); + show_reg("PSL Rev", (val >> 0) & 0xffff); + show_reg("CAIA Ver", (val >> 16) & 0xffff); + pci_read_config_dword(dev, vsec + 0x10, &val); + show_reg("Base Image Rev", (val >> 0) & 0xffff); + show_reg("Reserved", (val >> 16) & 0x0fff); + show_reg("Image Control", (val >> 28) & 0x3); + show_reg("Reserved", (val >> 30) & 0x1); + show_reg("Image Loaded", (val >> 31) & 0x1); + + pci_read_config_dword(dev, vsec + 0x14, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x18, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x1c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x20, &val); + show_reg("AFU Descriptor Offset", val); + pci_read_config_dword(dev, vsec + 0x24, &val); + show_reg("AFU Descriptor Size", val); + pci_read_config_dword(dev, vsec + 0x28, &val); + show_reg("Problem State Offset", val); + pci_read_config_dword(dev, vsec + 0x2c, &val); + show_reg("Problem State Size", val); + + pci_read_config_dword(dev, vsec + 0x30, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x34, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x38, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x3c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x40, &val); + show_reg("PSL Programming Port", val); + pci_read_config_dword(dev, vsec + 0x44, &val); + show_reg("PSL Programming Control", val); + + pci_read_config_dword(dev, vsec + 0x48, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x4c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x50, &val); + show_reg("Flash Address Register", val); + pci_read_config_dword(dev, vsec + 0x54, &val); + show_reg("Flash Size Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Status/Control Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Data Port", val); + +#undef show_reg +} + +static void dump_afu_descriptor(struct cxl_afu *afu) +{ + u64 val; + +#define show_reg(name, what) \ + dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) + + val = AFUD_READ_INFO(afu); + show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val)); + show_reg("num_of_processes", AFUD_NUM_PROCS(val)); + show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); + show_reg("req_prog_mode", val & 0xffffULL); + + val = AFUD_READ(afu, 0x8); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x10); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x18); + show_reg("Reserved", val); + + val = AFUD_READ_CR(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_CR_len", AFUD_CR_LEN(val)); + + val = AFUD_READ_CR_OFF(afu); + show_reg("AFU_CR_offset", val); + + val = AFUD_READ_PPPSA(afu); + show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff); + show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val)); + + val = AFUD_READ_PPPSA_OFF(afu); + show_reg("PerProcessPSA_offset", val); + + val = AFUD_READ_EB(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_EB_len", AFUD_EB_LEN(val)); + + val = AFUD_READ_EB_OFF(afu); + show_reg("AFU_EB_offset", val); + +#undef show_reg +} + +static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +{ + struct device_node *np; + const __be32 *prop; + u64 psl_dsnctl; + u64 chipid; + + if (!(np = pnv_pci_to_phb_node(dev))) + return -ENODEV; + + while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL))) + np = of_get_next_parent(np); + if (!np) + return -ENODEV; + chipid = be32_to_cpup(prop); + of_node_put(np); + + /* Tell PSL where to route data to */ + psl_dsnctl = 0x02E8900002000000ULL | (chipid << (63-5)); + cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl); + cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); + /* snoop write mask */ + cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); + /* set fir_accum */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); + /* for debugging with trace arrays */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); + + return 0; +} + +static int init_implementation_afu_regs(struct cxl_afu *afu) +{ + /* read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); + /* APC read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); + /* for debugging with trace arrays */ + cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); + cxl_p1n_write(afu, CXL_PSL_RXCTL_A, 0xF000000000000000ULL); + + return 0; +} + +int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, + unsigned int virq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); +} + +int cxl_alloc_one_irq(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirqs(dev, 1); +} + +void cxl_release_one_irq(struct cxl *adapter, int hwirq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_release_hwirqs(dev, hwirq, 1); +} + +int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); +} + +void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + pnv_cxl_release_hwirq_ranges(irqs, dev); +} + +static int setup_cxl_bars(struct pci_dev *dev) +{ + /* Safety check in case we get backported to < 3.17 without M64 */ + if ((p1_base(dev) < 0x100000000ULL) || + (p2_base(dev) < 0x100000000ULL)) { + dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n"); + return -ENODEV; + } + + /* + * BAR 4/5 has a special meaning for CXL and must be programmed with a + * special value corresponding to the CXL protocol address range. + * For POWER 8 that means bits 48:49 must be set to 10 + */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000); + + return 0; +} + +/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ +static int switch_card_to_cxl(struct pci_dev *dev) +{ + int vsec; + u8 val; + int rc; + + dev_info(&dev->dev, "switch card to CXL\n"); + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { + dev_err(&dev->dev, "failed to read current mode control: %i", rc); + return rc; + } + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { + dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); + return rc; + } + /* + * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching + * PCIe config space. + */ + msleep(100); + + return 0; +} + +static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +{ + u64 p1n_base, p2n_base, afu_desc; + const u64 p1n_size = 0x100; + const u64 p2n_size = 0x1000; + + p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); + p2n_base = p2_base(dev) + (afu->slice * p2n_size); + afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size)); + afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size); + + if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size))) + goto err; + if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) + goto err1; + if (afu_desc) { + if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size))) + goto err2; + } + + return 0; +err2: + iounmap(afu->p2n_mmio); +err1: + iounmap(afu->p1n_mmio); +err: + dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); + return -ENOMEM; +} + +static void cxl_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p1n_mmio) + iounmap(afu->p2n_mmio); + if (afu->p1n_mmio) + iounmap(afu->p1n_mmio); +} + +static void cxl_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("cxl_release_afu\n"); + + kfree(afu); +} + +static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) +{ + struct cxl_afu *afu; + + if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) + return NULL; + + afu->adapter = adapter; + afu->dev.parent = &adapter->dev; + afu->dev.release = cxl_release_afu; + afu->slice = slice; + idr_init(&afu->contexts_idr); + spin_lock_init(&afu->contexts_lock); + spin_lock_init(&afu->afu_cntl_lock); + mutex_init(&afu->spa_mutex); + + afu->prefault_mode = CXL_PREFAULT_NONE; + afu->irqs_max = afu->adapter->user_irqs; + + return afu; +} + +/* Expects AFU struct to have recently been zeroed out */ +static int cxl_read_afu_descriptor(struct cxl_afu *afu) +{ + u64 val; + + val = AFUD_READ_INFO(afu); + afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); + afu->max_procs_virtualised = AFUD_NUM_PROCS(val); + + if (AFUD_AFU_DIRECTED(val)) + afu->modes_supported |= CXL_MODE_DIRECTED; + if (AFUD_DEDICATED_PROCESS(val)) + afu->modes_supported |= CXL_MODE_DEDICATED; + if (AFUD_TIME_SLICED(val)) + afu->modes_supported |= CXL_MODE_TIME_SLICED; + + val = AFUD_READ_PPPSA(afu); + afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; + afu->psa = AFUD_PPPSA_PSA(val); + if ((afu->pp_psa = AFUD_PPPSA_PP(val))) + afu->pp_offset = AFUD_READ_PPPSA_OFF(afu); + + return 0; +} + +static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) +{ + if (afu->psa && afu->adapter->ps_size < + (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { + dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); + return -ENODEV; + } + + if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) + dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); + + return 0; +} + +static int sanitise_afu_regs(struct cxl_afu *afu) +{ + u64 reg; + + /* + * Clear out any regs that contain either an IVTE or address or may be + * waiting on an acknowledgement to try to be a bit safer as we bring + * it online + */ + reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg); + if (cxl_afu_reset(afu)) + return -EIO; + if (cxl_afu_disable(afu)) + return -EIO; + if (cxl_psl_purge(afu)) + return -EIO; + } + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); + reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending DSISR: %#.16llx\n", reg); + if (reg & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0xffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#.16llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } + reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending error status: %#.16llx\n", reg); + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); + } + + return 0; +} + +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + bool free = true; + int rc; + + if (!(afu = cxl_alloc_afu(adapter, slice))) + return -ENOMEM; + + if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice))) + goto err1; + + if ((rc = cxl_map_slice_regs(afu, adapter, dev))) + goto err1; + + if ((rc = sanitise_afu_regs(afu))) + goto err2; + + /* We need to reset the AFU before we can read the AFU descriptor */ + if ((rc = cxl_afu_reset(afu))) + goto err2; + + if (cxl_verbose) + dump_afu_descriptor(afu); + + if ((rc = cxl_read_afu_descriptor(afu))) + goto err2; + + if ((rc = cxl_afu_descriptor_looks_ok(afu))) + goto err2; + + if ((rc = init_implementation_afu_regs(afu))) + goto err2; + + if ((rc = cxl_register_serr_irq(afu))) + goto err2; + + if ((rc = cxl_register_psl_irq(afu))) + goto err3; + + /* Don't care if this fails */ + cxl_debugfs_afu_add(afu); + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + + if ((rc = cxl_afu_select_best_mode(afu))) + goto err_put2; + + adapter->afu[afu->slice] = afu; + + return 0; + +err_put2: + cxl_sysfs_afu_remove(afu); +err_put1: + device_unregister(&afu->dev); + free = false; + cxl_debugfs_afu_remove(afu); + cxl_release_psl_irq(afu); +err3: + cxl_release_serr_irq(afu); +err2: + cxl_unmap_slice_regs(afu); +err1: + if (free) + kfree(afu); + return rc; +} + +static void cxl_remove_afu(struct cxl_afu *afu) +{ + pr_devel("cxl_remove_afu\n"); + + if (!afu) + return; + + cxl_sysfs_afu_remove(afu); + cxl_debugfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_afu_deactivate_mode(afu); + + cxl_release_psl_irq(afu); + cxl_release_serr_irq(afu); + cxl_unmap_slice_regs(afu); + + device_unregister(&afu->dev); +} + + +static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +{ + if (pci_request_region(dev, 2, "priv 2 regs")) + goto err1; + if (pci_request_region(dev, 0, "priv 1 regs")) + goto err2; + + pr_devel("cxl_map_adapter_regs: p1: %#.16llx %#llx, p2: %#.16llx %#llx", + p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); + + if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) + goto err3; + + if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) + goto err4; + + return 0; + +err4: + iounmap(adapter->p1_mmio); + adapter->p1_mmio = NULL; +err3: + pci_release_region(dev, 0); +err2: + pci_release_region(dev, 2); +err1: + return -ENOMEM; +} + +static void cxl_unmap_adapter_regs(struct cxl *adapter) +{ + if (adapter->p1_mmio) + iounmap(adapter->p1_mmio); + if (adapter->p2_mmio) + iounmap(adapter->p2_mmio); +} + +static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) +{ + int vsec; + u32 afu_desc_off, afu_desc_size; + u32 ps_off, ps_size; + u16 vseclen; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); + if (vseclen < CXL_VSEC_MIN_SIZE) { + pr_err("ABORTING: CXL VSEC too short\n"); + return -EINVAL; + } + + CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status); + CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev); + CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major); + CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor); + CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); + CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); + adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); + adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE); + adapter->perst_select_user = !!(image_state & CXL_VSEC_PERST_SELECT_USER); + + CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); + CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); + CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size); + CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off); + CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size); + + /* Convert everything to bytes, because there is NO WAY I'd look at the + * code a month later and forget what units these are in ;-) */ + adapter->ps_off = ps_off * 64 * 1024; + adapter->ps_size = ps_size * 64 * 1024; + adapter->afu_desc_off = afu_desc_off * 64 * 1024; + adapter->afu_desc_size = afu_desc_size *64 * 1024; + + /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ + adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; + + return 0; +} + +static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) +{ + if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) + return -EBUSY; + + if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { + dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n"); + return -EINVAL; + } + + if (!adapter->slices) { + /* Once we support dynamic reprogramming we can use the card if + * it supports loadable AFUs */ + dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n"); + return -EINVAL; + } + + if (!adapter->afu_desc_off || !adapter->afu_desc_size) { + dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n"); + return -EINVAL; + } + + if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { + dev_err(&adapter->dev, "ABORTING: Problem state size larger than " + "available in BAR2: 0x%llx > 0x%llx\n", + adapter->ps_size, p2_size(dev) - adapter->ps_off); + return -EINVAL; + } + + return 0; +} + +static void cxl_release_adapter(struct device *dev) +{ + struct cxl *adapter = to_cxl_adapter(dev); + + pr_devel("cxl_release_adapter\n"); + + kfree(adapter); +} + +static struct cxl *cxl_alloc_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + + if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) + return NULL; + + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); + spin_lock_init(&adapter->afu_list_lock); + + return adapter; +} + +static int sanitise_adapter_regs(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + return cxl_tlb_slb_invalidate(adapter); +} + +static struct cxl *cxl_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + bool free = true; + int rc; + + + if (!(adapter = cxl_alloc_adapter(dev))) + return ERR_PTR(-ENOMEM); + + if ((rc = switch_card_to_cxl(dev))) + goto err1; + + if ((rc = cxl_alloc_adapter_nr(adapter))) + goto err1; + + if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) + goto err2; + + if ((rc = cxl_read_vsec(adapter, dev))) + goto err2; + + if ((rc = cxl_vsec_looks_ok(adapter, dev))) + goto err2; + + if ((rc = cxl_map_adapter_regs(adapter, dev))) + goto err2; + + if ((rc = sanitise_adapter_regs(adapter))) + goto err2; + + if ((rc = init_implementation_adapter_regs(adapter, dev))) + goto err3; + + if ((rc = pnv_phb_to_cxl(dev))) + goto err3; + + if ((rc = cxl_register_psl_err_irq(adapter))) + goto err3; + + /* Don't care if this one fails: */ + cxl_debugfs_adapter_add(adapter); + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + return adapter; + +err_put1: + device_unregister(&adapter->dev); + free = false; + cxl_debugfs_adapter_remove(adapter); + cxl_release_psl_err_irq(adapter); +err3: + cxl_unmap_adapter_regs(adapter); +err2: + cxl_remove_adapter_nr(adapter); +err1: + if (free) + kfree(adapter); + return ERR_PTR(rc); +} + +static void cxl_remove_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + pr_devel("cxl_release_adapter\n"); + + cxl_sysfs_adapter_remove(adapter); + cxl_debugfs_adapter_remove(adapter); + cxl_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + cxl_remove_adapter_nr(adapter); + + device_unregister(&adapter->dev); + + pci_release_region(pdev, 0); + pci_release_region(pdev, 2); + pci_disable_device(pdev); +} + +static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct cxl *adapter; + int slice; + int rc; + + pci_dev_get(dev); + + if (cxl_verbose) + dump_cxl_config_space(dev); + + if ((rc = setup_cxl_bars(dev))) + return rc; + + if ((rc = pci_enable_device(dev))) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } + + adapter = cxl_init_adapter(dev); + if (IS_ERR(adapter)) { + dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + for (slice = 0; slice < adapter->slices; slice++) { + if ((rc = cxl_init_afu(adapter, slice, dev))) + dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + } + + return 0; +} + +static void cxl_remove(struct pci_dev *dev) +{ + struct cxl *adapter = pci_get_drvdata(dev); + int afu; + + dev_warn(&dev->dev, "pci remove\n"); + + /* + * Lock to prevent someone grabbing a ref through the adapter list as + * we are removing it + */ + for (afu = 0; afu < adapter->slices; afu++) + cxl_remove_afu(adapter->afu[afu]); + cxl_remove_adapter(adapter); +} + +struct pci_driver cxl_pci_driver = { + .name = "cxl-pci", + .id_table = cxl_pci_tbl, + .probe = cxl_probe, + .remove = cxl_remove, +}; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c new file mode 100644 index 000000000000..ce7ec06d87d1 --- /dev/null +++ b/drivers/misc/cxl/sysfs.c @@ -0,0 +1,385 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "cxl.h" + +#define to_afu_chardev_m(d) dev_get_drvdata(d) + +/********* Adapter attributes **********************************************/ + +static ssize_t caia_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major, + adapter->caia_minor); +} + +static ssize_t psl_revision_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev); +} + +static ssize_t base_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image); +} + +static ssize_t image_loaded_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (adapter->user_image_loaded) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static struct device_attribute adapter_attrs[] = { + __ATTR_RO(caia_version), + __ATTR_RO(psl_revision), + __ATTR_RO(base_image), + __ATTR_RO(image_loaded), +}; + + +/********* AFU master specific attributes **********************************/ + +static ssize_t mmio_size_show_master(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t pp_mmio_off_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset); +} + +static ssize_t pp_mmio_len_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); +} + +static struct device_attribute afu_master_attrs[] = { + __ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL), + __ATTR_RO(pp_mmio_off), + __ATTR_RO(pp_mmio_len), +}; + + +/********* AFU attributes **************************************************/ + +static ssize_t mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->pp_size) + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t reset_store_afu(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int rc; + + /* Not safe to reset if it is currently in use */ + spin_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) { + rc = -EBUSY; + goto err; + } + + if ((rc = cxl_afu_reset(afu))) + goto err; + + rc = count; +err: + spin_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t irqs_min_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs); +} + +static ssize_t irqs_max_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max); +} + +static ssize_t irqs_max_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + ssize_t ret; + int irqs_max; + + ret = sscanf(buf, "%i", &irqs_max); + if (ret != 1) + return -EINVAL; + + if (irqs_max < afu->pp_irqs) + return -EINVAL; + + if (irqs_max > afu->adapter->user_irqs) + return -EINVAL; + + afu->irqs_max = irqs_max; + return count; +} + +static ssize_t modes_supported_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + char *p = buf, *end = buf + PAGE_SIZE; + + if (afu->modes_supported & CXL_MODE_DEDICATED) + p += scnprintf(p, end - p, "dedicated_process\n"); + if (afu->modes_supported & CXL_MODE_DIRECTED) + p += scnprintf(p, end - p, "afu_directed\n"); + return (p - buf); +} + +static ssize_t prefault_mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + switch (afu->prefault_mode) { + case CXL_PREFAULT_WED: + return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n"); + case CXL_PREFAULT_ALL: + return scnprintf(buf, PAGE_SIZE, "all\n"); + default: + return scnprintf(buf, PAGE_SIZE, "none\n"); + } +} + +static ssize_t prefault_mode_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + enum prefault_modes mode = -1; + + if (!strncmp(buf, "work_element_descriptor", 23)) + mode = CXL_PREFAULT_WED; + if (!strncmp(buf, "all", 3)) + mode = CXL_PREFAULT_ALL; + if (!strncmp(buf, "none", 4)) + mode = CXL_PREFAULT_NONE; + + if (mode == -1) + return -EINVAL; + + afu->prefault_mode = mode; + return count; +} + +static ssize_t mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->current_mode == CXL_MODE_DEDICATED) + return scnprintf(buf, PAGE_SIZE, "dedicated_process\n"); + if (afu->current_mode == CXL_MODE_DIRECTED) + return scnprintf(buf, PAGE_SIZE, "afu_directed\n"); + return scnprintf(buf, PAGE_SIZE, "none\n"); +} + +static ssize_t mode_store(struct device *device, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int old_mode, mode = -1; + int rc = -EBUSY; + + /* can't change this if we have a user */ + spin_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) + goto err; + + if (!strncmp(buf, "dedicated_process", 17)) + mode = CXL_MODE_DEDICATED; + if (!strncmp(buf, "afu_directed", 12)) + mode = CXL_MODE_DIRECTED; + if (!strncmp(buf, "none", 4)) + mode = 0; + + if (mode == -1) { + rc = -EINVAL; + goto err; + } + + /* + * cxl_afu_deactivate_mode needs to be done outside the lock, prevent + * other contexts coming in before we are ready: + */ + old_mode = afu->current_mode; + afu->current_mode = 0; + afu->num_procs = 0; + + spin_unlock(&afu->contexts_lock); + + if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) + return rc; + if ((rc = cxl_afu_activate_mode(afu, mode))) + return rc; + + return count; +err: + spin_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t api_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION); +} + +static ssize_t api_version_compatible_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(mmio_size), + __ATTR_RO(irqs_min), + __ATTR_RW(irqs_max), + __ATTR_RO(modes_supported), + __ATTR_RW(mode), + __ATTR_RW(prefault_mode), + __ATTR_RO(api_version), + __ATTR_RO(api_version_compatible), + __ATTR(reset, S_IWUSR, NULL, reset_store_afu), +}; + + + +int cxl_sysfs_adapter_add(struct cxl *adapter) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i]))) + goto err; + } + return 0; +err: + for (i--; i >= 0; i--) + device_remove_file(&adapter->dev, &adapter_attrs[i]); + return rc; +} +void cxl_sysfs_adapter_remove(struct cxl *adapter) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) + device_remove_file(&adapter->dev, &adapter_attrs[i]); +} + +int cxl_sysfs_afu_add(struct cxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(&afu->dev, &afu_attrs[i]); + return rc; +} + +void cxl_sysfs_afu_remove(struct cxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); +} + +int cxl_sysfs_afu_m_add(struct cxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + return rc; +} + +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); +} From 66b43081c0bde3171208a7cb52f5807dce4a79e4 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:03 +1100 Subject: [PATCH 127/130] cxl: Add userspace header file This adds a header file for use by userspace programs wanting to interact with the kernel cxl driver. It defines structs and magic numbers required for userspace to interact with devices in /dev/cxl/afuM.N. Further documentation on this interface is added in a subsequent patch in Documentation/powerpc/cxl.txt. It also adds this new userspace header file to Kbuild so it's exported when doing "make headers_installs". Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- include/uapi/Kbuild | 1 + include/uapi/misc/Kbuild | 2 + include/uapi/misc/cxl.h | 87 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 include/uapi/misc/Kbuild create mode 100644 include/uapi/misc/cxl.h diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild index 81d2106287fe..245aa6e05e6a 100644 --- a/include/uapi/Kbuild +++ b/include/uapi/Kbuild @@ -12,3 +12,4 @@ header-y += video/ header-y += drm/ header-y += xen/ header-y += scsi/ +header-y += misc/ diff --git a/include/uapi/misc/Kbuild b/include/uapi/misc/Kbuild new file mode 100644 index 000000000000..e96cae7d58c9 --- /dev/null +++ b/include/uapi/misc/Kbuild @@ -0,0 +1,2 @@ +# misc Header export list +header-y += cxl.h diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h new file mode 100644 index 000000000000..c232be6ae21f --- /dev/null +++ b/include/uapi/misc/cxl.h @@ -0,0 +1,87 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_MISC_CXL_H +#define _UAPI_MISC_CXL_H + +#include +#include + +/* Structs for IOCTLS for userspace to talk to the kernel */ +struct cxl_ioctl_start_work { + __u64 flags; + __u64 work_element_descriptor; + __u64 amr; + __s16 num_interrupts; + __s16 reserved1; + __s32 reserved2; + __u64 reserved3; + __u64 reserved4; + __u64 reserved5; + __u64 reserved6; +}; +#define CXL_START_WORK_AMR 0x0000000000000001ULL +#define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL +#define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ + CXL_START_WORK_NUM_IRQS) + +/* IOCTL numbers */ +#define CXL_MAGIC 0xCA +#define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) +#define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) + +/* Events from read() */ +#define CXL_READ_MIN_SIZE 0x1000 /* 4K */ + +enum cxl_event_type { + CXL_EVENT_RESERVED = 0, + CXL_EVENT_AFU_INTERRUPT = 1, + CXL_EVENT_DATA_STORAGE = 2, + CXL_EVENT_AFU_ERROR = 3, +}; + +struct cxl_event_header { + __u16 type; + __u16 size; + __u16 process_element; + __u16 reserved1; +}; + +struct cxl_event_afu_interrupt { + __u16 flags; + __u16 irq; /* Raised AFU interrupt number */ + __u32 reserved1; +}; + +struct cxl_event_data_storage { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 addr; + __u64 dsisr; + __u64 reserved3; +}; + +struct cxl_event_afu_error { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 error; +}; + +struct cxl_event { + struct cxl_event_header header; + union { + struct cxl_event_afu_interrupt irq; + struct cxl_event_data_storage fault; + struct cxl_event_afu_error afu_error; + }; +}; + +#endif /* _UAPI_MISC_CXL_H */ From 881632c905f29fd7173250fd1d5b3a9a769d02be Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:04 +1100 Subject: [PATCH 128/130] cxl: Add driver to Kbuild and Makefiles Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Kconfig | 17 +++++++++++++++++ drivers/misc/cxl/Makefile | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 5cdd3199f97d..a990b39b4dfb 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -6,3 +6,20 @@ config CXL_BASE bool default n select PPC_COPRO_BASE + +config CXL + tristate "Support for IBM Coherent Accelerators (CXL)" + depends on PPC_POWERNV && PCI_MSI + select CXL_BASE + default m + help + Select this option to enable driver support for IBM Coherent + Accelerators (CXL). CXL is otherwise known as Coherent Accelerator + Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be + coherently attached to a CPU via an MMU. This driver enables + userspace programs to access these accelerators via /dev/cxl/afuM.N + devices. + + CAPI adapters are found in POWER8 based systems. + + If unsure, say N. diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index e30ad0ab0cf5..165e98fef2c2 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1 +1,3 @@ +cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o +obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o From a9282d01cf357379ce29103cec5e7651a53c634d Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Oct 2014 19:55:05 +1100 Subject: [PATCH 129/130] cxl: Add documentation for userspace APIs This documentation gives an overview of the hardware architecture, userspace APIs via /dev/cxl/afuM.N and the syfs files. It also adds a MAINTAINERS file entry for cxl. Signed-off-by: Ian Munsie Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 129 ++++++++ Documentation/ioctl/ioctl-number.txt | 1 + Documentation/powerpc/00-INDEX | 2 + Documentation/powerpc/cxl.txt | 379 ++++++++++++++++++++++ MAINTAINERS | 12 + include/uapi/misc/cxl.h | 7 +- 6 files changed, 527 insertions(+), 3 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-cxl create mode 100644 Documentation/powerpc/cxl.txt diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl new file mode 100644 index 000000000000..554405ec1955 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -0,0 +1,129 @@ +Slave contexts (eg. /sys/class/cxl/afu0.0s): + +What: /sys/class/cxl//irqs_max +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Decimal value of maximum number of interrupts that can be + requested by userspace. The default on probe is the maximum + that hardware can support (eg. 2037). Write values will limit + userspace applications to that many userspace interrupts. Must + be >= irqs_min. + +What: /sys/class/cxl//irqs_min +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the minimum number of interrupts that + userspace must request on a CXL_START_WORK ioctl. Userspace may + omit the num_interrupts field in the START_WORK IOCTL to get + this minimum automatically. + +What: /sys/class/cxl//mmio_size +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the size of the MMIO space that may be mmaped + by userspace. + +What: /sys/class/cxl//modes_supported +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + List of the modes this AFU supports. One per line. + Valid entries are: "dedicated_process" and "afu_directed" + +What: /sys/class/cxl//mode +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + The current mode the AFU is using. Will be one of the modes + given in modes_supported. Writing will change the mode + provided that no user contexts are attached. + + +What: /sys/class/cxl//prefault_mode +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Set the mode for prefaulting in segments into the segment table + when performing the START_WORK ioctl. Possible values: + none: No prefaulting (default) + work_element_descriptor: Treat the work element + descriptor as an effective address and + prefault what it points to. + all: all segments process calling START_WORK maps. + +What: /sys/class/cxl//reset +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + Writing 1 here will reset the AFU provided there are not + contexts active on the AFU. + +What: /sys/class/cxl//api_version +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the current version of the kernel/user API. + +What: /sys/class/cxl//api_version_com +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the the lowest version of the userspace API + this this kernel supports. + + + +Master contexts (eg. /sys/class/cxl/afu0.0m) + +What: /sys/class/cxl/m/mmio_size +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the size of the MMIO space that may be mmaped + by userspace. This includes all slave contexts space also. + +What: /sys/class/cxl/m/pp_mmio_len +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the Per Process MMIO space length. + +What: /sys/class/cxl/m/pp_mmio_off +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Decimal value of the Per Process MMIO space offset. + + +Card info (eg. /sys/class/cxl/card0) + +What: /sys/class/cxl//caia_version +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Identifies the CAIA Version the card implements. + +What: /sys/class/cxl//psl_version +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Identifies the revision level of the PSL. + +What: /sys/class/cxl//base_image +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Identifies the revision level of the base image for devices + that support loadable PSLs. For FPGAs this field identifies + the image contained in the on-adapter flash which is loaded + during the initial program load. + +What: /sys/class/cxl//image_loaded +Date: September 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Will return "user" or "factory" depending on the image loaded + onto the card. diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 7e240a7c9ab1..8136e1fd30fd 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -313,6 +313,7 @@ Code Seq#(hex) Include File Comments 0xB1 00-1F PPPoX 0xB3 00 linux/mmc/ioctl.h 0xC0 00-0F linux/usb/iowarrior.h +0xCA 00-0F uapi/misc/cxl.h 0xCB 00-1F CBM serial IEC bus in development: 0xCD 01 linux/reiserfs_fs.h diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX index a68784d0a1ee..6fd0e8bb8140 100644 --- a/Documentation/powerpc/00-INDEX +++ b/Documentation/powerpc/00-INDEX @@ -11,6 +11,8 @@ bootwrapper.txt cpu_features.txt - info on how we support a variety of CPUs with minimal compile-time options. +cxl.txt + - Overview of the CXL driver. eeh-pci-error-recovery.txt - info on PCI Bus EEH Error Recovery firmware-assisted-dump.txt diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt new file mode 100644 index 000000000000..2c71ecc519d9 --- /dev/null +++ b/Documentation/powerpc/cxl.txt @@ -0,0 +1,379 @@ +Coherent Accelerator Interface (CXL) +==================================== + +Introduction +============ + + The coherent accelerator interface is designed to allow the + coherent connection of accelerators (FPGAs and other devices) to a + POWER system. These devices need to adhere to the Coherent + Accelerator Interface Architecture (CAIA). + + IBM refers to this as the Coherent Accelerator Processor Interface + or CAPI. In the kernel it's referred to by the name CXL to avoid + confusion with the ISDN CAPI subsystem. + + Coherent in this context means that the accelerator and CPUs can + both access system memory directly and with the same effective + addresses. + + +Hardware overview +================= + + POWER8 FPGA + +----------+ +---------+ + | | | | + | CPU | | AFU | + | | | | + | | | | + | | | | + +----------+ +---------+ + | PHB | | | + | +------+ | PSL | + | | CAPP |<------>| | + +---+------+ PCIE +---------+ + + The POWER8 chip has a Coherently Attached Processor Proxy (CAPP) + unit which is part of the PCIe Host Bridge (PHB). This is managed + by Linux by calls into OPAL. Linux doesn't directly program the + CAPP. + + The FPGA (or coherently attached device) consists of two parts. + The POWER Service Layer (PSL) and the Accelerator Function Unit + (AFU). The AFU is used to implement specific functionality behind + the PSL. The PSL, among other things, provides memory address + translation services to allow each AFU direct access to userspace + memory. + + The AFU is the core part of the accelerator (eg. the compression, + crypto etc function). The kernel has no knowledge of the function + of the AFU. Only userspace interacts directly with the AFU. + + The PSL provides the translation and interrupt services that the + AFU needs. This is what the kernel interacts with. For example, if + the AFU needs to read a particular effective address, it sends + that address to the PSL, the PSL then translates it, fetches the + data from memory and returns it to the AFU. If the PSL has a + translation miss, it interrupts the kernel and the kernel services + the fault. The context to which this fault is serviced is based on + who owns that acceleration function. + + +AFU Modes +========= + + There are two programming modes supported by the AFU. Dedicated + and AFU directed. AFU may support one or both modes. + + When using dedicated mode only one MMU context is supported. In + this mode, only one userspace process can use the accelerator at + time. + + When using AFU directed mode, up to 16K simultaneous contexts can + be supported. This means up to 16K simultaneous userspace + applications may use the accelerator (although specific AFUs may + support fewer). In this mode, the AFU sends a 16 bit context ID + with each of its requests. This tells the PSL which context is + associated with each operation. If the PSL can't translate an + operation, the ID can also be accessed by the kernel so it can + determine the userspace context associated with an operation. + + +MMIO space +========== + + A portion of the accelerator MMIO space can be directly mapped + from the AFU to userspace. Either the whole space can be mapped or + just a per context portion. The hardware is self describing, hence + the kernel can determine the offset and size of the per context + portion. + + +Interrupts +========== + + AFUs may generate interrupts that are destined for userspace. These + are received by the kernel as hardware interrupts and passed onto + userspace by a read syscall documented below. + + Data storage faults and error interrupts are handled by the kernel + driver. + + +Work Element Descriptor (WED) +============================= + + The WED is a 64-bit parameter passed to the AFU when a context is + started. Its format is up to the AFU hence the kernel has no + knowledge of what it represents. Typically it will be the + effective address of a work queue or status block where the AFU + and userspace can share control and status information. + + + + +User API +======== + + For AFUs operating in AFU directed mode, two character device + files will be created. /dev/cxl/afu0.0m will correspond to a + master context and /dev/cxl/afu0.0s will correspond to a slave + context. Master contexts have access to the full MMIO space an + AFU provides. Slave contexts have access to only the per process + MMIO space an AFU provides. + + For AFUs operating in dedicated process mode, the driver will + only create a single character device per AFU called + /dev/cxl/afu0.0d. This will have access to the entire MMIO space + that the AFU provides (like master contexts in AFU directed). + + The types described below are defined in include/uapi/misc/cxl.h + + The following file operations are supported on both slave and + master devices. + + +open +---- + + Opens the device and allocates a file descriptor to be used with + the rest of the API. + + A dedicated mode AFU only has one context and only allows the + device to be opened once. + + An AFU directed mode AFU can have many contexts, the device can be + opened once for each context that is available. + + When all available contexts are allocated the open call will fail + and return -ENOSPC. + + Note: IRQs need to be allocated for each context, which may limit + the number of contexts that can be created, and therefore + how many times the device can be opened. The POWER8 CAPP + supports 2040 IRQs and 3 are used by the kernel, so 2037 are + left. If 1 IRQ is needed per context, then only 2037 + contexts can be allocated. If 4 IRQs are needed per context, + then only 2037/4 = 509 contexts can be allocated. + + +ioctl +----- + + CXL_IOCTL_START_WORK: + Starts the AFU context and associates it with the current + process. Once this ioctl is successfully executed, all memory + mapped into this process is accessible to this AFU context + using the same effective addresses. No additional calls are + required to map/unmap memory. The AFU memory context will be + updated as userspace allocates and frees memory. This ioctl + returns once the AFU context is started. + + Takes a pointer to a struct cxl_ioctl_start_work: + + struct cxl_ioctl_start_work { + __u64 flags; + __u64 work_element_descriptor; + __u64 amr; + __s16 num_interrupts; + __s16 reserved1; + __s32 reserved2; + __u64 reserved3; + __u64 reserved4; + __u64 reserved5; + __u64 reserved6; + }; + + flags: + Indicates which optional fields in the structure are + valid. + + work_element_descriptor: + The Work Element Descriptor (WED) is a 64-bit argument + defined by the AFU. Typically this is an effective + address pointing to an AFU specific structure + describing what work to perform. + + amr: + Authority Mask Register (AMR), same as the powerpc + AMR. This field is only used by the kernel when the + corresponding CXL_START_WORK_AMR value is specified in + flags. If not specified the kernel will use a default + value of 0. + + num_interrupts: + Number of userspace interrupts to request. This field + is only used by the kernel when the corresponding + CXL_START_WORK_NUM_IRQS value is specified in flags. + If not specified the minimum number required by the + AFU will be allocated. The min and max number can be + obtained from sysfs. + + reserved fields: + For ABI padding and future extensions + + CXL_IOCTL_GET_PROCESS_ELEMENT: + Get the current context id, also known as the process element. + The value is returned from the kernel as a __u32. + + +mmap +---- + + An AFU may have an MMIO space to facilitate communication with the + AFU. If it does, the MMIO space can be accessed via mmap. The size + and contents of this area are specific to the particular AFU. The + size can be discovered via sysfs. + + In AFU directed mode, master contexts are allowed to map all of + the MMIO space and slave contexts are allowed to only map the per + process MMIO space associated with the context. In dedicated + process mode the entire MMIO space can always be mapped. + + This mmap call must be done after the START_WORK ioctl. + + Care should be taken when accessing MMIO space. Only 32 and 64-bit + accesses are supported by POWER8. Also, the AFU will be designed + with a specific endianness, so all MMIO accesses should consider + endianness (recommend endian(3) variants like: le64toh(), + be64toh() etc). These endian issues equally apply to shared memory + queues the WED may describe. + + +read +---- + + Reads events from the AFU. Blocks if no events are pending + (unless O_NONBLOCK is supplied). Returns -EIO in the case of an + unrecoverable error or if the card is removed. + + read() will always return an integral number of events. + + The buffer passed to read() must be at least 4K bytes. + + The result of the read will be a buffer of one or more events, + each event is of type struct cxl_event, of varying size. + + struct cxl_event { + struct cxl_event_header header; + union { + struct cxl_event_afu_interrupt irq; + struct cxl_event_data_storage fault; + struct cxl_event_afu_error afu_error; + }; + }; + + The struct cxl_event_header is defined as: + + struct cxl_event_header { + __u16 type; + __u16 size; + __u16 process_element; + __u16 reserved1; + }; + + type: + This defines the type of event. The type determines how + the rest of the event is structured. These types are + described below and defined by enum cxl_event_type. + + size: + This is the size of the event in bytes including the + struct cxl_event_header. The start of the next event can + be found at this offset from the start of the current + event. + + process_element: + Context ID of the event. + + reserved field: + For future extensions and padding. + + If the event type is CXL_EVENT_AFU_INTERRUPT then the event + structure is defined as: + + struct cxl_event_afu_interrupt { + __u16 flags; + __u16 irq; /* Raised AFU interrupt number */ + __u32 reserved1; + }; + + flags: + These flags indicate which optional fields are present + in this struct. Currently all fields are mandatory. + + irq: + The IRQ number sent by the AFU. + + reserved field: + For future extensions and padding. + + If the event type is CXL_EVENT_DATA_STORAGE then the event + structure is defined as: + + struct cxl_event_data_storage { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 addr; + __u64 dsisr; + __u64 reserved3; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are mandatory. + + address: + The address that the AFU unsuccessfully attempted to + access. Valid accesses will be handled transparently by the + kernel but invalid accesses will generate this event. + + dsisr: + This field gives information on the type of fault. It is a + copy of the DSISR from the PSL hardware when the address + fault occurred. The form of the DSISR is as defined in the + CAIA. + + reserved fields: + For future extensions + + If the event type is CXL_EVENT_AFU_ERROR then the event structure + is defined as: + + struct cxl_event_afu_error { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 error; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are Mandatory. + + error: + Error status from the AFU. Defined by the AFU. + + reserved fields: + For future extensions and padding + +Sysfs Class +=========== + + A cxl sysfs class is added under /sys/class/cxl to facilitate + enumeration and tuning of the accelerators. Its layout is + described in Documentation/ABI/testing/sysfs-class-cxl + +Udev rules +========== + + The following udev rules could be used to create a symlink to the + most logical chardev to use in any programming mode (afuX.Yd for + dedicated, afuX.Ys for afu directed), since the API is virtually + identical for each: + + SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b" + SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \ + KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b" diff --git a/MAINTAINERS b/MAINTAINERS index 809ecd680d88..facc2198eaba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2711,6 +2711,18 @@ W: http://www.chelsio.com S: Supported F: drivers/net/ethernet/chelsio/cxgb4vf/ +CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER +M: Ian Munsie +M: Michael Neuling +L: linuxppc-dev@lists.ozlabs.org +S: Supported +F: drivers/misc/cxl/ +F: include/misc/cxl.h +F: include/uapi/misc/cxl.h +F: Documentation/powerpc/cxl.txt +F: Documentation/powerpc/cxl.txt +F: Documentation/ABI/testing/sysfs-class-cxl + STMMAC ETHERNET DRIVER M: Giuseppe Cavallaro L: netdev@vger.kernel.org diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index c232be6ae21f..cd6d789b73ec 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -13,7 +13,7 @@ #include #include -/* Structs for IOCTLS for userspace to talk to the kernel */ + struct cxl_ioctl_start_work { __u64 flags; __u64 work_element_descriptor; @@ -26,19 +26,20 @@ struct cxl_ioctl_start_work { __u64 reserved5; __u64 reserved6; }; + #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS) -/* IOCTL numbers */ +/* ioctl numbers */ #define CXL_MAGIC 0xCA #define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) #define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) -/* Events from read() */ #define CXL_READ_MIN_SIZE 0x1000 /* 4K */ +/* Events from read() */ enum cxl_event_type { CXL_EVENT_RESERVED = 0, CXL_EVENT_AFU_INTERRUPT = 1, From d53ba6b3bba33432cc37b7101a86f8f3392c46e7 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 9 Oct 2014 11:17:46 +1100 Subject: [PATCH 130/130] cxl: Fix afu_read() not doing finish_wait() on signal or non-blocking If afu_read() returned due to a signal or the AFU file descriptor being opened non-blocking it would not call finish_wait() before returning, which could lead to a crash later when something else wakes up the wait queue. This patch restructures the wait logic to ensure that the cleanup is done correctly. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/file.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 847b7e646a7e..378b099e7c0b 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -273,6 +273,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, struct cxl_context *ctx = file->private_data; struct cxl_event event; unsigned long flags; + int rc; DEFINE_WAIT(wait); if (count < CXL_READ_MIN_SIZE) @@ -285,13 +286,17 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; + if (file->f_flags & O_NONBLOCK) { + rc = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + rc = -ERESTARTSYS; + goto out; + } + spin_unlock_irqrestore(&ctx->lock, flags); - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (signal_pending(current)) - return -ERESTARTSYS; - pr_devel("afu_read going to sleep...\n"); schedule(); pr_devel("afu_read woken up\n"); @@ -336,6 +341,11 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (copy_to_user(buf, &event, event.header.size)) return -EFAULT; return event.header.size; + +out: + finish_wait(&ctx->wq, &wait); + spin_unlock_irqrestore(&ctx->lock, flags); + return rc; } static const struct file_operations afu_fops = {