forked from Minki/linux
6b4b78fed4
Problem: New Dell PowerEdge servers have 2 embedded ethernet ports, which are labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and in the printed documentation. Assuming no other add-in ethernet ports in the system, Linux 2.4 kernels name these eth0 and eth1 respectively. Many people have come to expect this naming. Linux 2.6 kernels name these eth1 and eth0 respectively (backwards from expectations). I also have reports that various Sun and HP servers have similar behavior. Root cause: Linux 2.4 kernels walk the pci_devices list, which happens to be sorted in breadth-first order (or pcbios_find_device order on i386, which most often is breadth-first also). 2.6 kernels have both the pci_devices list and the pci_bus_type.klist_devices list, the latter is what is walked at driver load time to match the pci_id tables; this klist happens to be in depth-first order. On systems where, for physical routing reasons, NIC1 appears on a lower bus number than NIC2, but NIC2's bridge is discovered first in the depth-first ordering, NIC2 will be discovered before NIC1. If the list were sorted breadth-first, NIC1 would be discovered before NIC2. A PowerEdge 1955 system has the following topology which easily exhibits the difference between depth-first and breadth-first device lists. -[0000:00]-+-00.0 Intel Corporation 5000P Chipset Memory Controller Hub +-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0) +-1c.0-[0000:01-02]----00.0-[0000:02]----00.0 Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1) Other factors, such as device driver load order and the presence of PCI slots at various points in the bus hierarchy further complicate this problem; I'm not trying to solve those here, just restore the device order, and thus basic behavior, that 2.4 kernels had. Solution: The solution can come in multiple steps. Suggested fix #1: kernel Patch below optionally sorts the two device lists into breadth-first ordering to maintain compatibility with 2.4 kernels. It adds two new command line options: pci=bfsort pci=nobfsort to force the sort order, or not, as you wish. It also adds DMI checks for the specific Dell systems which exhibit "backwards" ordering, to make them "right". Suggested fix #2: udev rules from userland Many people also have the expectation that embedded NICs are always discovered before add-in NICs (which this patch does not try to do). Using the PCI IRQ Routing Table provided by system BIOS, it's easy to determine which PCI devices are embedded, or if add-in, which PCI slot they're in. I'm working on a tool that would allow udev to name ethernet devices in ascending embedded, slot 1 .. slot N order, subsort by PCI bus/dev/fn breadth-first. It'll be possible to use it independent of udev as well for those distributions that don't use udev in their installers. Suggested fix #3: system board routing rules One can constrain the system board layout to put NIC1 ahead of NIC2 regardless of breadth-first or depth-first discovery order. This adds a significant level of complexity to board routing, and may not be possible in all instances (witness the above systems from several major manufacturers). I don't want to encourage this particular train of thought too far, at the expense of not doing #1 or #2 above. Feedback appreciated. Patch tested on a Dell PowerEdge 1955 blade with 2.6.18. You'll also note I took some liberty and temporarily break the klist abstraction to simplify and speed up the sort algorithm. I think that's both safe and appropriate in this instance. Signed-off-by: Matt Domsch <Matt_Domsch@dell.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
350 lines
8.1 KiB
C
350 lines
8.1 KiB
C
/*
|
|
* Low-Level PCI Support for PC
|
|
*
|
|
* (c) 1999--2000 Martin Mares <mj@ucw.cz>
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/init.h>
|
|
#include <linux/dmi.h>
|
|
|
|
#include <asm/acpi.h>
|
|
#include <asm/segment.h>
|
|
#include <asm/io.h>
|
|
#include <asm/smp.h>
|
|
|
|
#include "pci.h"
|
|
|
|
unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
|
|
PCI_PROBE_MMCONF;
|
|
|
|
int pci_bf_sort;
|
|
int pci_routeirq;
|
|
int pcibios_last_bus = -1;
|
|
unsigned long pirq_table_addr;
|
|
struct pci_bus *pci_root_bus;
|
|
struct pci_raw_ops *raw_pci_ops;
|
|
|
|
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
|
|
{
|
|
return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
|
|
}
|
|
|
|
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
|
|
{
|
|
return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
|
|
}
|
|
|
|
struct pci_ops pci_root_ops = {
|
|
.read = pci_read,
|
|
.write = pci_write,
|
|
};
|
|
|
|
/*
|
|
* legacy, numa, and acpi all want to call pcibios_scan_root
|
|
* from their initcalls. This flag prevents that.
|
|
*/
|
|
int pcibios_scanned;
|
|
|
|
/*
|
|
* This interrupt-safe spinlock protects all accesses to PCI
|
|
* configuration space.
|
|
*/
|
|
DEFINE_SPINLOCK(pci_config_lock);
|
|
|
|
/*
|
|
* Several buggy motherboards address only 16 devices and mirror
|
|
* them to next 16 IDs. We try to detect this `feature' on all
|
|
* primary buses (those containing host bridges as they are
|
|
* expected to be unique) and remove the ghost devices.
|
|
*/
|
|
|
|
static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
|
|
{
|
|
struct list_head *ln, *mn;
|
|
struct pci_dev *d, *e;
|
|
int mirror = PCI_DEVFN(16,0);
|
|
int seen_host_bridge = 0;
|
|
int i;
|
|
|
|
DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
|
|
list_for_each(ln, &b->devices) {
|
|
d = pci_dev_b(ln);
|
|
if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
|
|
seen_host_bridge++;
|
|
for (mn=ln->next; mn != &b->devices; mn=mn->next) {
|
|
e = pci_dev_b(mn);
|
|
if (e->devfn != d->devfn + mirror ||
|
|
e->vendor != d->vendor ||
|
|
e->device != d->device ||
|
|
e->class != d->class)
|
|
continue;
|
|
for(i=0; i<PCI_NUM_RESOURCES; i++)
|
|
if (e->resource[i].start != d->resource[i].start ||
|
|
e->resource[i].end != d->resource[i].end ||
|
|
e->resource[i].flags != d->resource[i].flags)
|
|
continue;
|
|
break;
|
|
}
|
|
if (mn == &b->devices)
|
|
return;
|
|
}
|
|
if (!seen_host_bridge)
|
|
return;
|
|
printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
|
|
|
|
ln = &b->devices;
|
|
while (ln->next != &b->devices) {
|
|
d = pci_dev_b(ln->next);
|
|
if (d->devfn >= mirror) {
|
|
list_del(&d->global_list);
|
|
list_del(&d->bus_list);
|
|
kfree(d);
|
|
} else
|
|
ln = ln->next;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Called after each bus is probed, but before its children
|
|
* are examined.
|
|
*/
|
|
|
|
void __devinit pcibios_fixup_bus(struct pci_bus *b)
|
|
{
|
|
pcibios_fixup_ghosts(b);
|
|
pci_read_bridge_bases(b);
|
|
}
|
|
|
|
/*
|
|
* Only use DMI information to set this if nothing was passed
|
|
* on the kernel command line (which was parsed earlier).
|
|
*/
|
|
|
|
static int __devinit set_bf_sort(struct dmi_system_id *d)
|
|
{
|
|
if (pci_bf_sort == pci_bf_sort_default) {
|
|
pci_bf_sort = pci_dmi_bf;
|
|
printk(KERN_INFO "PCI: %s detected, enabling pci=bfsort.\n", d->ident);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
|
|
*/
|
|
#ifdef __i386__
|
|
static int __devinit assign_all_busses(struct dmi_system_id *d)
|
|
{
|
|
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
|
|
printk(KERN_INFO "%s detected: enabling PCI bus# renumbering"
|
|
" (pci=assign-busses)\n", d->ident);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
|
|
#ifdef __i386__
|
|
/*
|
|
* Laptops which need pci=assign-busses to see Cardbus cards
|
|
*/
|
|
{
|
|
.callback = assign_all_busses,
|
|
.ident = "Samsung X20 Laptop",
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"),
|
|
DMI_MATCH(DMI_PRODUCT_NAME, "SX20S"),
|
|
},
|
|
},
|
|
#endif /* __i386__ */
|
|
{
|
|
.callback = set_bf_sort,
|
|
.ident = "Dell PowerEdge 1950",
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
|
|
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"),
|
|
},
|
|
},
|
|
{
|
|
.callback = set_bf_sort,
|
|
.ident = "Dell PowerEdge 1955",
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
|
|
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"),
|
|
},
|
|
},
|
|
{
|
|
.callback = set_bf_sort,
|
|
.ident = "Dell PowerEdge 2900",
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
|
|
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"),
|
|
},
|
|
},
|
|
{
|
|
.callback = set_bf_sort,
|
|
.ident = "Dell PowerEdge 2950",
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
|
|
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"),
|
|
},
|
|
},
|
|
{}
|
|
};
|
|
|
|
struct pci_bus * __devinit pcibios_scan_root(int busnum)
|
|
{
|
|
struct pci_bus *bus = NULL;
|
|
|
|
dmi_check_system(pciprobe_dmi_table);
|
|
|
|
while ((bus = pci_find_next_bus(bus)) != NULL) {
|
|
if (bus->number == busnum) {
|
|
/* Already scanned */
|
|
return bus;
|
|
}
|
|
}
|
|
|
|
printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
|
|
|
|
return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
|
|
}
|
|
|
|
extern u8 pci_cache_line_size;
|
|
|
|
static int __init pcibios_init(void)
|
|
{
|
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
|
|
if (!raw_pci_ops) {
|
|
printk(KERN_WARNING "PCI: System does not support PCI\n");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Assume PCI cacheline size of 32 bytes for all x86s except K7/K8
|
|
* and P4. It's also good for 386/486s (which actually have 16)
|
|
* as quite a few PCI devices do not support smaller values.
|
|
*/
|
|
pci_cache_line_size = 32 >> 2;
|
|
if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
|
|
pci_cache_line_size = 64 >> 2; /* K7 & K8 */
|
|
else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
|
|
pci_cache_line_size = 128 >> 2; /* P4 */
|
|
|
|
pcibios_resource_survey();
|
|
|
|
if (pci_bf_sort >= pci_force_bf)
|
|
pci_sort_breadthfirst();
|
|
#ifdef CONFIG_PCI_BIOS
|
|
if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
|
|
pcibios_sort();
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
subsys_initcall(pcibios_init);
|
|
|
|
char * __devinit pcibios_setup(char *str)
|
|
{
|
|
if (!strcmp(str, "off")) {
|
|
pci_probe = 0;
|
|
return NULL;
|
|
} else if (!strcmp(str, "bfsort")) {
|
|
pci_bf_sort = pci_force_bf;
|
|
return NULL;
|
|
} else if (!strcmp(str, "nobfsort")) {
|
|
pci_bf_sort = pci_force_nobf;
|
|
return NULL;
|
|
}
|
|
#ifdef CONFIG_PCI_BIOS
|
|
else if (!strcmp(str, "bios")) {
|
|
pci_probe = PCI_PROBE_BIOS;
|
|
return NULL;
|
|
} else if (!strcmp(str, "nobios")) {
|
|
pci_probe &= ~PCI_PROBE_BIOS;
|
|
return NULL;
|
|
} else if (!strcmp(str, "nosort")) {
|
|
pci_probe |= PCI_NO_SORT;
|
|
return NULL;
|
|
} else if (!strcmp(str, "biosirq")) {
|
|
pci_probe |= PCI_BIOS_IRQ_SCAN;
|
|
return NULL;
|
|
} else if (!strncmp(str, "pirqaddr=", 9)) {
|
|
pirq_table_addr = simple_strtoul(str+9, NULL, 0);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_PCI_DIRECT
|
|
else if (!strcmp(str, "conf1")) {
|
|
pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
|
|
return NULL;
|
|
}
|
|
else if (!strcmp(str, "conf2")) {
|
|
pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
|
|
return NULL;
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_PCI_MMCONFIG
|
|
else if (!strcmp(str, "nommconf")) {
|
|
pci_probe &= ~PCI_PROBE_MMCONF;
|
|
return NULL;
|
|
}
|
|
#endif
|
|
else if (!strcmp(str, "noacpi")) {
|
|
acpi_noirq_set();
|
|
return NULL;
|
|
}
|
|
else if (!strcmp(str, "noearly")) {
|
|
pci_probe |= PCI_PROBE_NOEARLY;
|
|
return NULL;
|
|
}
|
|
#ifndef CONFIG_X86_VISWS
|
|
else if (!strcmp(str, "usepirqmask")) {
|
|
pci_probe |= PCI_USE_PIRQ_MASK;
|
|
return NULL;
|
|
} else if (!strncmp(str, "irqmask=", 8)) {
|
|
pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
|
|
return NULL;
|
|
} else if (!strncmp(str, "lastbus=", 8)) {
|
|
pcibios_last_bus = simple_strtol(str+8, NULL, 0);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
else if (!strcmp(str, "rom")) {
|
|
pci_probe |= PCI_ASSIGN_ROMS;
|
|
return NULL;
|
|
} else if (!strcmp(str, "assign-busses")) {
|
|
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
|
|
return NULL;
|
|
} else if (!strcmp(str, "routeirq")) {
|
|
pci_routeirq = 1;
|
|
return NULL;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
unsigned int pcibios_assign_all_busses(void)
|
|
{
|
|
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
|
|
}
|
|
|
|
int pcibios_enable_device(struct pci_dev *dev, int mask)
|
|
{
|
|
int err;
|
|
|
|
if ((err = pcibios_enable_resources(dev, mask)) < 0)
|
|
return err;
|
|
|
|
return pcibios_enable_irq(dev);
|
|
}
|
|
|
|
void pcibios_disable_device (struct pci_dev *dev)
|
|
{
|
|
pcibios_disable_resources(dev);
|
|
if (pcibios_disable_irq)
|
|
pcibios_disable_irq(dev);
|
|
}
|