From 503fa23614dc95f96af883a8e2e873d5c6cd53d8 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 17 Sep 2022 13:03:09 +0100 Subject: [PATCH 1/5] PCI: Access Link 2 registers only for devices with Links PCIe r2.0, sec 7.8 added Link Capabilities/Status/Control 2 registers to the PCIe Capability with Capability Version 2. Previously we assumed these registers were implemented for all PCIe Capabilities of version 2 or greater, but in fact they are only implemented for devices with Links. Update pcie_capability_reg_implemented() to check whether the device has a Link. [bhelgaas: commit log, squash export] Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057070.2275@angie.orcam.me.uk Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057300.2275@angie.orcam.me.uk Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas --- drivers/pci/access.c | 8 +++++++- drivers/pci/pci.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 708c7529647f..3c230ca3de58 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -350,6 +350,11 @@ bool pcie_cap_has_lnkctl(const struct pci_dev *dev) type == PCI_EXP_TYPE_PCIE_BRIDGE; } +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev) +{ + return pcie_cap_has_lnkctl(dev) && pcie_cap_version(dev) > 1; +} + static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev) { return pcie_downstream_port(dev) && @@ -390,10 +395,11 @@ static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) return pcie_cap_has_rtctl(dev); case PCI_EXP_DEVCAP2: case PCI_EXP_DEVCTL2: + return pcie_cap_version(dev) > 1; case PCI_EXP_LNKCAP2: case PCI_EXP_LNKCTL2: case PCI_EXP_LNKSTA2: - return pcie_cap_version(dev) > 1; + return pcie_cap_has_lnkctl2(dev); default: return false; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b1ebb7ab8805..9ed3b5550043 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -15,6 +15,7 @@ extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ From 44e985938e85503d0a69ec538e15fd33c1a4df05 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 7 Nov 2022 15:31:08 -0600 Subject: [PATCH 2/5] Revert "PCI: Clear PCI_STATUS when setting up device" This reverts commit 6cd514e58f12b211d638dbf6f791fa18d854f09c. Christophe Fergeau reported that 6cd514e58f12 ("PCI: Clear PCI_STATUS when setting up device") causes boot failures when trying to start linux guests with Apple's virtualization framework (for example using https://developer.apple.com/documentation/virtualization/running_linux_in_a_virtual_machine?language=objc) 6cd514e58f12 only solved a cosmetic problem, so revert it to fix the boot failures. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2137803 Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b66fa42c4b1f..1d6f7b502020 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1891,9 +1891,6 @@ int pci_setup_device(struct pci_dev *dev) dev->broken_intx_masking = pci_intx_mask_broken(dev); - /* Clear errors left from system firmware */ - pci_write_config_word(dev, PCI_STATUS, 0xffff); - switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ if (class == PCI_CLASS_BRIDGE_PCI) From c14f7ccc9f5dcf9d06ddeec706f85405b2c80600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 14 Jul 2022 20:41:30 +0200 Subject: [PATCH 3/5] PCI: Assign PCI domain IDs by ida_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace assignment of PCI domain IDs from atomic_inc_return() to ida_alloc(). Use two IDAs, one for static domain allocations (those which are defined in device tree) and second for dynamic allocations (all other). During removal of root bus / host bridge, also release the domain ID. The released ID can be reused again, for example when dynamically loading and unloading native PCI host bridge drivers. This change also allows to mix static device tree assignment and dynamic by kernel as all static allocations are reserved in dynamic pool. [bhelgaas: set "err" if "bus->domain_nr < 0"] Link: https://lore.kernel.org/r/20220714184130.5436-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 107 +++++++++++++++++++++++++------------------ drivers/pci/probe.c | 7 +++ drivers/pci/remove.c | 6 +++ include/linux/pci.h | 1 + 4 files changed, 76 insertions(+), 45 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2127aba3550b..9f3cc829dfee 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6743,60 +6743,70 @@ static void pci_no_domains(void) } #ifdef CONFIG_PCI_DOMAINS_GENERIC -static atomic_t __domain_nr = ATOMIC_INIT(-1); +static DEFINE_IDA(pci_domain_nr_static_ida); +static DEFINE_IDA(pci_domain_nr_dynamic_ida); -static int pci_get_new_domain_nr(void) +static void of_pci_reserve_static_domain_nr(void) { - return atomic_inc_return(&__domain_nr); + struct device_node *np; + int domain_nr; + + for_each_node_by_type(np, "pci") { + domain_nr = of_get_pci_domain_nr(np); + if (domain_nr < 0) + continue; + /* + * Permanently allocate domain_nr in dynamic_ida + * to prevent it from dynamic allocation. + */ + ida_alloc_range(&pci_domain_nr_dynamic_ida, + domain_nr, domain_nr, GFP_KERNEL); + } } static int of_pci_bus_find_domain_nr(struct device *parent) { - static int use_dt_domains = -1; - int domain = -1; + static bool static_domains_reserved = false; + int domain_nr; - if (parent) - domain = of_get_pci_domain_nr(parent->of_node); - - /* - * Check DT domain and use_dt_domains values. - * - * If DT domain property is valid (domain >= 0) and - * use_dt_domains != 0, the DT assignment is valid since this means - * we have not previously allocated a domain number by using - * pci_get_new_domain_nr(); we should also update use_dt_domains to - * 1, to indicate that we have just assigned a domain number from - * DT. - * - * If DT domain property value is not valid (ie domain < 0), and we - * have not previously assigned a domain number from DT - * (use_dt_domains != 1) we should assign a domain number by - * using the: - * - * pci_get_new_domain_nr() - * - * API and update the use_dt_domains value to keep track of method we - * are using to assign domain numbers (use_dt_domains = 0). - * - * All other combinations imply we have a platform that is trying - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(), - * which is a recipe for domain mishandling and it is prevented by - * invalidating the domain value (domain = -1) and printing a - * corresponding error. - */ - if (domain >= 0 && use_dt_domains) { - use_dt_domains = 1; - } else if (domain < 0 && use_dt_domains != 1) { - use_dt_domains = 0; - domain = pci_get_new_domain_nr(); - } else { - if (parent) - pr_err("Node %pOF has ", parent->of_node); - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n"); - domain = -1; + /* On the first call scan device tree for static allocations. */ + if (!static_domains_reserved) { + of_pci_reserve_static_domain_nr(); + static_domains_reserved = true; } - return domain; + if (parent) { + /* + * If domain is in DT, allocate it in static IDA. This + * prevents duplicate static allocations in case of errors + * in DT. + */ + domain_nr = of_get_pci_domain_nr(parent->of_node); + if (domain_nr >= 0) + return ida_alloc_range(&pci_domain_nr_static_ida, + domain_nr, domain_nr, + GFP_KERNEL); + } + + /* + * If domain was not specified in DT, choose a free ID from dynamic + * allocations. All domain numbers from DT are permanently in + * dynamic allocations to prevent assigning them to other DT nodes + * without static domain. + */ + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL); +} + +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (bus->domain_nr < 0) + return; + + /* Release domain from IDA where it was allocated. */ + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr) + ida_free(&pci_domain_nr_static_ida, bus->domain_nr); + else + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr); } int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) @@ -6804,6 +6814,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) return acpi_disabled ? of_pci_bus_find_domain_nr(parent) : acpi_pci_bus_find_domain_nr(bus); } + +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (!acpi_disabled) + return; + of_pci_bus_release_domain_nr(bus, parent); +} #endif /** diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 1d6f7b502020..1e234189aff1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -906,6 +906,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) bus->domain_nr = pci_bus_find_domain_nr(bus, parent); else bus->domain_nr = bridge->domain_nr; + if (bus->domain_nr < 0) { + err = bus->domain_nr; + goto free; + } #endif b = pci_find_bus(pci_domain_nr(bus), bridge->busnr); @@ -1030,6 +1034,9 @@ unregister: device_del(&bridge->dev); free: +#ifdef CONFIG_PCI_DOMAINS_GENERIC + pci_bus_release_domain_nr(bus, parent); +#endif kfree(bus); return err; } diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 4c54c75050dc..0145aef1b930 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus) pci_remove_bus(bus); host_bridge->bus = NULL; +#ifdef CONFIG_PCI_DOMAINS_GENERIC + /* Release domain_nr if it was dynamically allocated */ + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) + pci_bus_release_domain_nr(bus, host_bridge->dev.parent); +#endif + /* remove the host bridge */ device_del(&host_bridge->dev); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 2bda4a4e47e8..28af4414f789 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1726,6 +1726,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) { return 0; } #endif int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent); #endif /* Some architectures require additional setup to direct VGA traffic */ From 98b04dd0b4577894520493d96bc4623387767445 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 26 Oct 2022 02:11:21 -0400 Subject: [PATCH 4/5] PCI: Fix pci_device_is_present() for VFs by checking PF pci_device_is_present() previously didn't work for VFs because it reads the Vendor and Device ID, which are 0xffff for VFs, which looks like they aren't present. Check the PF instead. Wei Gong reported that if virtio I/O is in progress when the driver is unbound or "0" is written to /sys/.../sriov_numvfs, the virtio I/O operation hangs, which may result in output like this: task:bash state:D stack: 0 pid: 1773 ppid: 1241 flags:0x00004002 Call Trace: schedule+0x4f/0xc0 blk_mq_freeze_queue_wait+0x69/0xa0 blk_mq_freeze_queue+0x1b/0x20 blk_cleanup_queue+0x3d/0xd0 virtblk_remove+0x3c/0xb0 [virtio_blk] virtio_dev_remove+0x4b/0x80 ... device_unregister+0x1b/0x60 unregister_virtio_device+0x18/0x30 virtio_pci_remove+0x41/0x80 pci_device_remove+0x3e/0xb0 This happened because pci_device_is_present(VF) returned "false" in virtio_pci_remove(), so it called virtio_break_device(). The broken vq meant that vring_interrupt() skipped the vq.callback() that would have completed the virtio I/O operation via virtblk_done(). [bhelgaas: commit log, simplify to always use pci_physfn(), add stable tag] Link: https://lore.kernel.org/r/20221026060912.173250-1-mst@redhat.com Reported-by: Wei Gong Tested-by: Wei Gong Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9f3cc829dfee..fba95486caaf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6447,6 +6447,8 @@ bool pci_device_is_present(struct pci_dev *pdev) { u32 v; + /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */ + pdev = pci_physfn(pdev); if (pci_dev_is_disconnected(pdev)) return false; return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); From 2d9cd957d40c3ac491b358e7cff0515bb07a3a9c Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Mon, 21 Nov 2022 10:00:29 +0800 Subject: [PATCH 5/5] PCI: Check for alloc failure in pci_request_irq() When kvasprintf() fails to allocate memory, it returns a NULL pointer. Return error from pci_request_irq() so we don't dereference it. [bhelgaas: commit log] Fixes: 704e8953d3e9 ("PCI/irq: Add pci_request_irq() and pci_free_irq() helpers") Link: https://lore.kernel.org/r/20221121020029.3759444-1-zengheng4@huawei.com Signed-off-by: Zeng Heng Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig --- drivers/pci/irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index 12ecd0aaa28d..0050e8f6814e 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, va_start(ap, fmt); devname = kvasprintf(GFP_KERNEL, fmt, ap); va_end(ap); + if (!devname) + return -ENOMEM; ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn, irqflags, devname, dev_id);