From cb4cb2cb9b0b14bdf2fc7125e099ed7e818cea42 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 14:59:44 -0700 Subject: [PATCH 01/11] [IA64] hotplug/ia64: SN Hotplug Driver: SN IRQ Fixes This patch fixes the SN IRQ code such that cpu affinity and Hotplug can modify IRQ values. The sn_irq_info structures are now locked using a RCU lock mechanism to avoid lock contention in the lost interrupt WAR code. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/sn/kernel/io_init.c | 39 +++--- arch/ia64/sn/kernel/irq.c | 246 ++++++++++++++++++---------------- include/asm-ia64/sn/intr.h | 6 +- include/asm-ia64/sn/pcidev.h | 4 +- 4 files changed, 160 insertions(+), 135 deletions(-) diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 783eb4323847..2f03e3f52b63 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -21,7 +21,6 @@ #include <asm/sn/simulator.h> #include <asm/sn/tioca_provider.h> -char master_baseio_wid; nasid_t master_nasid = INVALID_NASID; /* Partition Master */ struct slab_info { @@ -231,11 +230,13 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; int segment = 0; - uint64_t size; - struct sn_irq_info *sn_irq_info; - struct pci_dev *host_pci_dev; int status = 0; struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + struct sn_irq_info *sn_irq_info; + unsigned long size; + unsigned int bus_no, devfn; dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); if (SN_PCIDEV_INFO(dev) <= 0) @@ -253,7 +254,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) (u64) __pa(SN_PCIDEV_INFO(dev)), (u64) __pa(sn_irq_info)); if (status) - BUG(); /* Cannot get platform pci device information information */ + BUG(); /* Cannot get platform pci device information */ /* Copy over PIO Mapped Addresses */ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { @@ -275,15 +276,20 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) dev->resource[idx].parent = &iomem_resource; } - /* set up host bus linkages */ - bs = SN_PCIBUS_BUSSOFT(dev->bus); - host_pci_dev = - pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32, - SN_PCIDEV_INFO(dev)-> - pdi_slot_host_handle & 0xffffffff); + /* Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info = - SN_PCIDEV_INFO(host_pci_dev); + SN_PCIDEV_INFO(host_pci_dev); SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev; + bs = SN_PCIBUS_BUSSOFT(dev->bus); SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs; if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { @@ -297,6 +303,9 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info; dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq; sn_irq_fixup(dev, sn_irq_info); + } else { + SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); } } @@ -403,11 +412,7 @@ static int __init sn_pci_init(void) */ ia64_max_iommu_merge_mask = ~PAGE_MASK; sn_fixup_ionodes(); - sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL); - if (sn_irq <= 0) - BUG(); /* Canno afford to run out of memory. */ - memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS); - + sn_irq_lh_init(); sn_init_cpei_timer(); #ifdef CONFIG_PROC_FS diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0f4e8138658f..e6f7551edfda 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/irq.h> +#include <linux/spinlock.h> #include <asm/sn/intr.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> @@ -25,7 +26,8 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); extern int sn_force_interrupt_flag; extern int sn_ioif_inited; -struct sn_irq_info **sn_irq; +static struct list_head **sn_irq_lh; +static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget, u64 sn_irq_info, @@ -101,7 +103,7 @@ static void sn_end_irq(unsigned int irq) nasid = get_nasid(); event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR (nasid, SH_EVENT_OCCURRED)); - /* If the UART bit is set here, we may have received an + /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. */ @@ -115,82 +117,84 @@ static void sn_end_irq(unsigned int irq) force_interrupt(irq); } +static void sn_irq_info_free(struct rcu_head *head); + static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) { - struct sn_irq_info *sn_irq_info = sn_irq[irq]; - struct sn_irq_info *tmp_sn_irq_info; + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; int cpuid, cpuphys; - nasid_t t_nasid; /* nasid to target */ - int t_slice; /* slice to target */ - - /* allocate a temp sn_irq_info struct to get new target info */ - tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL); - if (!tmp_sn_irq_info) - return; cpuid = first_cpu(mask); cpuphys = cpu_physical_id(cpuid); - t_nasid = cpuid_to_nasid(cpuid); - t_slice = cpuid_to_slice(cpuid); - while (sn_irq_info) { - int status; - int local_widget; - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) { + uint64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; - if (!bridge) - break; /* irq is not a device interrupt */ + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + break; + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + + bridge = (uint64_t) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + break; /* irq is not a device interrupt */ + } + + local_nasid = NASID_GET(bridge); if (local_nasid & 1) local_widget = TIO_SWIN_WIDGETNUM(bridge); else local_widget = SWIN_WIDGETNUM(bridge); - /* Free the old PROM sn_irq_info structure */ - sn_intr_free(local_nasid, local_widget, sn_irq_info); + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); - /* allocate a new PROM sn_irq_info struct */ + /* allocate a new PROM new_irq_info struct */ status = sn_intr_alloc(local_nasid, local_widget, - __pa(tmp_sn_irq_info), irq, t_nasid, - t_slice); + __pa(new_irq_info), irq, + cpuid_to_nasid(cpuid), + cpuid_to_slice(cpuid)); - if (status == 0) { - /* Update kernels sn_irq_info with new target info */ - unregister_intr_pda(sn_irq_info); - sn_irq_info->irq_cpuid = cpuid; - sn_irq_info->irq_nasid = t_nasid; - sn_irq_info->irq_slice = t_slice; - sn_irq_info->irq_xtalkaddr = - tmp_sn_irq_info->irq_xtalkaddr; - sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie; - register_intr_pda(sn_irq_info); + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + break; + } - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) { - pcibr_change_devices_irq(sn_irq_info); - } + new_irq_info->irq_cpuid = cpuid; + register_intr_pda(new_irq_info); - sn_irq_info = sn_irq_info->irq_next; + if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) + pcibr_change_devices_irq(new_irq_info); + + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP - set_irq_affinity_info((irq & 0xff), cpuphys, 0); + set_irq_affinity_info((irq & 0xff), cpuphys, 0); #endif - } else { - break; /* snp_affinity failed the intr_alloc */ - } } - kfree(tmp_sn_irq_info); } struct hw_interrupt_type irq_type_sn = { - "SN hub", - sn_startup_irq, - sn_shutdown_irq, - sn_enable_irq, - sn_disable_irq, - sn_ack_irq, - sn_end_irq, - sn_set_affinity_irq + .typename = "SN hub", + .startup = sn_startup_irq, + .shutdown = sn_shutdown_irq, + .enable = sn_enable_irq, + .disable = sn_disable_irq, + .ack = sn_ack_irq, + .end = sn_end_irq, + .set_affinity = sn_set_affinity_irq }; unsigned int sn_local_vector_to_irq(u8 vector) @@ -231,19 +235,18 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) struct sn_irq_info *tmp_irq_info; int i, foundmatch; + rcu_read_lock(); if (pdacpu(cpu)->sn_last_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_last_irq = i; @@ -251,60 +254,27 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) if (pdacpu(cpu)->sn_first_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); } + rcu_read_unlock(); } -struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq, - nasid_t nasid, int slice) +static void sn_irq_info_free(struct rcu_head *head) { struct sn_irq_info *sn_irq_info; - int status; - - sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL); - if (sn_irq_info == NULL) - return NULL; - - memset(sn_irq_info, 0x0, sizeof(*sn_irq_info)); - - status = - sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq, - nasid, slice); - - if (status) { - kfree(sn_irq_info); - return NULL; - } else { - return sn_irq_info; - } -} - -void sn_irq_free(struct sn_irq_info *sn_irq_info) -{ - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); - int local_widget; - - if (local_nasid & 1) /* tio check */ - local_widget = TIO_SWIN_WIDGETNUM(bridge); - else - local_widget = SWIN_WIDGETNUM(bridge); - - sn_intr_free(local_nasid, local_widget, sn_irq_info); + sn_irq_info = container_of(head, struct sn_irq_info, rcu); kfree(sn_irq_info); } @@ -314,30 +284,54 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) int slice = sn_irq_info->irq_slice; int cpu = nasid_slice_to_cpuid(nasid, slice); + pci_dev_get(pci_dev); + sn_irq_info->irq_cpuid = cpu; sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); /* link it into the sn_irq[irq] list */ - sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq]; - sn_irq[sn_irq_info->irq_irq] = sn_irq_info; + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + spin_unlock(&sn_irq_info_lock); (void)register_intr_pda(sn_irq_info); } +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info || !sn_irq_info->irq_irq) + return; + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + + pci_dev_put(pci_dev); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; if (!sn_ioif_inited) return; - sn_irq_info = sn_irq[irq]; - while (sn_irq_info) { + + rcu_read_lock(); + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) pcibr_force_interrupt(sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } + rcu_read_unlock(); } /* @@ -402,19 +396,41 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) void sn_lb_int_war_check(void) { + struct sn_irq_info *sn_irq_info; int i; if (!sn_ioif_inited || pda->sn_first_irq == 0) return; + + rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { - struct sn_irq_info *sn_irq_info = sn_irq[i]; - while (sn_irq_info) { - /* Only call for PCI bridges that are fully initialized. */ + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + /* + * Only call for PCI bridges that are fully + * initialized. + */ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) sn_check_intr(i, sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } } + rcu_read_unlock(); +} + +void sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } + } diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h index e51471fb0867..e190dd4213d5 100644 --- a/include/asm-ia64/sn/intr.h +++ b/include/asm-ia64/sn/intr.h @@ -9,6 +9,8 @@ #ifndef _ASM_IA64_SN_INTR_H #define _ASM_IA64_SN_INTR_H +#include <linux/rcupdate.h> + #define SGI_UART_VECTOR (0xe9) #define SGI_PCIBR_ERROR (0x33) @@ -33,7 +35,7 @@ // The SN PROM irq struct struct sn_irq_info { - struct sn_irq_info *irq_next; /* sharing irq list */ + struct sn_irq_info *irq_next; /* deprecated DO NOT USE */ short irq_nasid; /* Nasid IRQ is assigned to */ int irq_slice; /* slice IRQ is assigned to */ int irq_cpuid; /* kernel logical cpuid */ @@ -47,6 +49,8 @@ struct sn_irq_info { int irq_cookie; /* unique cookie */ int irq_flags; /* flags */ int irq_share_cnt; /* num devices sharing IRQ */ + struct list_head list; /* list of sn_irq_info structs */ + struct rcu_head rcu; /* rcu callback list */ }; extern void sn_send_IPI_phys(int, long, int, int); diff --git a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h index ed4031d80811..42aea21ee187 100644 --- a/include/asm-ia64/sn/pcidev.h +++ b/include/asm-ia64/sn/pcidev.h @@ -10,8 +10,6 @@ #include <linux/pci.h> -extern struct sn_irq_info **sn_irq; - #define SN_PCIDEV_INFO(pci_dev) \ ((struct pcidev_info *)(pci_dev)->sysdata) @@ -50,9 +48,11 @@ struct pcidev_info { struct sn_irq_info *pdi_sn_irq_info; struct sn_pcibus_provider *pdi_provider; /* sn pci ops */ + struct pci_dev *host_pci_dev; /* host bus link */ }; extern void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info); +extern void sn_irq_lh_init(void); #endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ From c13cf3714fc84ad2fd65771aa08e47c95a9f26ef Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 15:26:51 -0700 Subject: [PATCH 02/11] [IA64] hotplug/ia64: SN Hotplug Driver: moving of header files This patch moves header files out of the arch/ia64/sn directories and into include/asm-ia64/sn. These files were being included by other subsystems and should be under include/asm-ia64/sn. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/sn/include/xtalk/hubdev.h | 2 ++ arch/ia64/sn/kernel/io_init.c | 14 +++++++------- arch/ia64/sn/kernel/irq.c | 5 ++--- arch/ia64/sn/pci/pci_dma.c | 3 ++- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 2 +- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 15 +++++++-------- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 13 ++++++------- arch/ia64/sn/pci/pcibr/pcibr_reg.c | 8 ++++---- .../pci => include/asm-ia64/sn}/pcibr_provider.h | 7 +++++-- include/asm-ia64/sn/pcidev.h | 6 +++++- .../sn/include/pci => include/asm-ia64/sn}/pic.h | 14 +++++++------- .../include/pci => include/asm-ia64/sn}/tiocp.h | 2 +- 12 files changed, 49 insertions(+), 42 deletions(-) rename {arch/ia64/sn/include/pci => include/asm-ia64/sn}/pcibr_provider.h (98%) rename {arch/ia64/sn/include/pci => include/asm-ia64/sn}/pic.h (98%) rename {arch/ia64/sn/include/pci => include/asm-ia64/sn}/tiocp.h (99%) diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index 868e7ecae84b..580a1c0403a7 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -8,6 +8,8 @@ #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H #define _ASM_IA64_SN_XTALK_HUBDEV_H +#include "xtalk/xwidgetdev.h" + #define HUB_WIDGET_ID_MAX 0xf #define DEV_PER_WIDGET (2*2*8) #define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 2f03e3f52b63..041c4be02b2a 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -9,17 +9,17 @@ #include <linux/bootmem.h> #include <linux/nodemask.h> #include <asm/sn/types.h> -#include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> +#include <asm/sn/geo.h> +#include <asm/sn/io.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include "xtalk/xwidgetdev.h" -#include <asm/sn/geo.h> -#include "xtalk/hubdev.h" -#include <asm/sn/io.h> #include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> #include <asm/sn/tioca_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" nasid_t master_nasid = INVALID_NASID; /* Partition Master */ @@ -226,7 +226,7 @@ static void sn_fixup_ionodes(void) * from our PCI provider include PIO maps to BAR space and interrupt * objects. */ -static void sn_pci_fixup_slot(struct pci_dev *dev) +void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; int segment = 0; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index e6f7551edfda..cf4dbf9645f1 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -10,13 +10,12 @@ #include <linux/irq.h> #include <linux/spinlock.h> -#include <asm/sn/intr.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" #include <asm/sn/shub_mmr.h> #include <asm/sn/sn_sal.h> diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5da9bdbde7cb..a2f7a88aefbb 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -11,9 +11,10 @@ #include <linux/module.h> #include <asm/dma.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 0e47bce85f2d..d1647b863e61 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -8,9 +8,9 @@ #include <linux/types.h> #include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 64af2b2c1787..b058dc2a0b9d 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -8,18 +8,17 @@ #include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/xwidgetdev.h" -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" -#include "pci/tiocp.h" +#include <asm/sn/pic.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tiocp.h> #include "tio.h" -#include <asm/sn/addrs.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" extern int sn_ioif_inited; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 3893999d23d8..9bc4de4a3ec0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -6,18 +6,17 @@ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include <asm/sn/addrs.h> - +#include <asm/sn/sn_sal.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" static int sal_pcibr_error_interrupt(struct pcibus_info *soft) { diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c index 865c11c3b50a..21426d02fbe6 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -6,13 +6,13 @@ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" +#include <asm/sn/pic.h> +#include <asm/sn/tiocp.h> union br_ptr { struct tiocp tio; diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h similarity index 98% rename from arch/ia64/sn/include/pci/pcibr_provider.h rename to include/asm-ia64/sn/pcibr_provider.h index 1cd291d8badd..cbb4604c9349 100644 --- a/arch/ia64/sn/include/pci/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -8,6 +8,9 @@ #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> + /* Workarounds */ #define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */ @@ -20,7 +23,7 @@ #define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC) -/* +/* * The different PCI Bridge types supported on the SGI Altix platforms */ #define PCIBR_BRIDGETYPE_UNKNOWN -1 @@ -100,7 +103,7 @@ struct pcibus_info { struct ate_resource pbi_int_ate_resource; uint64_t pbi_int_ate_size; - + uint64_t pbi_dir_xbase; char pbi_hub_xid; diff --git a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h index 42aea21ee187..9610fcc63545 100644 --- a/include/asm-ia64/sn/pcidev.h +++ b/include/asm-ia64/sn/pcidev.h @@ -13,6 +13,8 @@ #define SN_PCIDEV_INFO(pci_dev) \ ((struct pcidev_info *)(pci_dev)->sysdata) +#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \ + (struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) /* * Given a pci_bus, return the sn pcibus_bussoft struct. Note that * this only works for root busses, not for busses represented by PPB's. @@ -53,6 +55,8 @@ struct pcidev_info { extern void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info); - +extern void sn_irq_unfixup(struct pci_dev *pci_dev); +extern void sn_pci_fixup_slot(struct pci_dev *dev); +extern void sn_pci_unfixup_slot(struct pci_dev *dev); extern void sn_irq_lh_init(void); #endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ diff --git a/arch/ia64/sn/include/pci/pic.h b/include/asm-ia64/sn/pic.h similarity index 98% rename from arch/ia64/sn/include/pci/pic.h rename to include/asm-ia64/sn/pic.h index fd18acecb1e6..0de82e6b0893 100644 --- a/arch/ia64/sn/include/pci/pic.h +++ b/include/asm-ia64/sn/pic.h @@ -15,7 +15,7 @@ * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC) * be designated as 'device 0'. That is a departure from earlier SGI * PCI bridges. Because of that we use config space 1 to access the - * config space of the first actual PCI device on the bus. + * config space of the first actual PCI device on the bus. * Here's what the PIC manual says: * * The current PCI-X bus specification now defines that the parent @@ -29,14 +29,14 @@ * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc. * PCI-X requires we start a 1, not 0 and currently the PX brick * does associate our: - * + * * device 0 with configuration space window 1, - * device 1 with configuration space window 2, + * device 1 with configuration space window 2, * device 2 with configuration space window 3, * device 3 with configuration space window 4. * - * The net effect is that all config space access are off-by-one with - * relation to other per-slot accesses on the PIC. + * The net effect is that all config space access are off-by-one with + * relation to other per-slot accesses on the PIC. * Here is a table that shows some of that: * * Internal Slot# @@ -65,7 +65,7 @@ *****************************************************************************/ /* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0] - * of a 64-bit register. When writing PIC registers, always write the + * of a 64-bit register. When writing PIC registers, always write the * entire 64 bits. */ @@ -164,7 +164,7 @@ struct pic { uint64_t clear_all; /* 0x000{438,,,5F8} */ } p_buf_count[8]; - + /* 0x000600-0x0009FF -- PCI/X registers */ uint64_t p_pcix_bus_err_addr; /* 0x000600 */ uint64_t p_pcix_bus_err_attr; /* 0x000608 */ diff --git a/arch/ia64/sn/include/pci/tiocp.h b/include/asm-ia64/sn/tiocp.h similarity index 99% rename from arch/ia64/sn/include/pci/tiocp.h rename to include/asm-ia64/sn/tiocp.h index f07c83b2bf6e..5f2489c9d2dd 100644 --- a/arch/ia64/sn/include/pci/tiocp.h +++ b/include/asm-ia64/sn/tiocp.h @@ -111,7 +111,7 @@ struct tiocp{ uint64_t clear_all; /* 0x000{438,,,5F8} */ } cp_buf_count[8]; - + /* 0x000600-0x0009FF -- PCI/X registers */ uint64_t cp_pcix_bus_err_addr; /* 0x000600 */ uint64_t cp_pcix_bus_err_attr; /* 0x000608 */ From e07d01e0aeba905aeca6e0ae612943417d396a0f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 15:28:40 -0700 Subject: [PATCH 03/11] [IA64] hotplug/ia64: SN Hotplug Driver - pci_find_next_bus export The pci_find_next_bus function is listed as being exported to drivers. It is not EXPORT_SYMBOL'd. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- drivers/pci/search.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/search.c b/drivers/pci/search.c index a90a533eba0f..05fa91a31c62 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -379,6 +379,7 @@ exit: EXPORT_SYMBOL(pci_dev_present); EXPORT_SYMBOL(pci_find_bus); +EXPORT_SYMBOL(pci_find_next_bus); EXPORT_SYMBOL(pci_find_device); EXPORT_SYMBOL(pci_find_device_reverse); EXPORT_SYMBOL(pci_find_slot); From 283c7f6ac6adb57e7dd13cdbc8d60b6ea4de6faf Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 15:29:13 -0700 Subject: [PATCH 04/11] [IA64] hotplug/ia64: SN Hotplug Driver - new SN PROM version code This patch is a rewrite of the code to check the PROM version. The current code has some deficiences in the way PROM comparisons were made. The minimum value of PROM that will boot has also been changed to 4.04. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/sn/kernel/setup.c | 13 +++++------ arch/ia64/sn/pci/tioca_provider.c | 3 +-- include/asm-ia64/sn/sn_sal.h | 39 ++++++++++--------------------- 3 files changed, 19 insertions(+), 36 deletions(-) diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 22e10d282c7f..7c7fe441d623 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -270,7 +270,7 @@ void __init sn_setup(char **cmdline_p) { long status, ticks_per_sec, drift; int pxm; - int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); + u32 version = sn_sal_rev(); extern void sn_cpu_init(void); ia64_sn_plat_set_error_handling_features(); @@ -308,22 +308,21 @@ void __init sn_setup(char **cmdline_p) * support here so we don't have to listen to failed keyboard probe * messages. */ - if ((major < 2 || (major == 2 && minor <= 9)) && - acpi_kbd_controller_present) { + if (version <= 0x0209 && acpi_kbd_controller_present) { printk(KERN_INFO "Disabling legacy keyboard support as prom " "is too old and doesn't provide FADT\n"); acpi_kbd_controller_present = 0; } - printk("SGI SAL version %x.%02x\n", major, minor); + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); /* * Confirm the SAL we're running on is recent enough... */ - if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR && - minor < SN_SAL_MIN_MINOR)) { + if (version < SN_SAL_MIN_VERSION) { printk(KERN_ERR "This kernel needs SGI SAL version >= " - "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR); + "%x.%02x\n", SN_SAL_MIN_VERSION >> 8, + SN_SAL_MIN_VERSION & 0x00FF); panic("PROM version too old\n"); } diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 05aa8c2fe9bb..51cc4e63092c 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -589,8 +589,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft) /* sanity check prom rev */ - if (sn_sal_rev_major() < 4 || - (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) { + if (sn_sal_rev() < 0x0406) { printk (KERN_ERR "%s: SGI prom rev 4.06 or greater required " "for tioca support\n", __FUNCTION__); diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 1455375d2ce4..27976d223186 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -134,43 +134,28 @@ #define SN_SAL_FAKE_PROM 0x02009999 - /** - * sn_sal_rev_major - get the major SGI SAL revision number - * - * The SGI PROM stores its version in sal_[ab]_rev_(major|minor). - * This routine simply extracts the major value from the - * @ia64_sal_systab structure constructed by ia64_sal_init(). - */ -static inline int -sn_sal_rev_major(void) + * sn_sal_revision - get the SGI SAL revision number + * + * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor). + * This routine simply extracts the major and minor values and + * presents them in a u32 format. + * + * For example, version 4.05 would be represented at 0x0405. + */ +static inline u32 +sn_sal_rev(void) { struct ia64_sal_systab *systab = efi.sal_systab; - return (int)systab->sal_b_rev_major; -} - -/** - * sn_sal_rev_minor - get the minor SGI SAL revision number - * - * The SGI PROM stores its version in sal_[ab]_rev_(major|minor). - * This routine simply extracts the minor value from the - * @ia64_sal_systab structure constructed by ia64_sal_init(). - */ -static inline int -sn_sal_rev_minor(void) -{ - struct ia64_sal_systab *systab = efi.sal_systab; - - return (int)systab->sal_b_rev_minor; + return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor); } /* * Specify the minimum PROM revsion required for this kernel. * Note that they're stored in hex format... */ -#define SN_SAL_MIN_MAJOR 0x4 /* SN2 kernels need at least PROM 4.0 */ -#define SN_SAL_MIN_MINOR 0x0 +#define SN_SAL_MIN_VERSION 0x0404 /* * Returns the master console nasid, if the call fails, return an illegal From 6f354b014b51716166f13f68b29212d3c44ed2c4 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 15:29:53 -0700 Subject: [PATCH 05/11] [IA64] hotplug/ia64: SN Hotplug Driver - SN Hotplug Driver code This patch is the SGI hotplug driver and additional changes required for the driver. These modifications include changes to the SN io_init.c code for memory management, the inclusion of new SAL calls to enable and disable PCI slots, and a hotplug-style driver. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/sn/kernel/io_init.c | 134 ++++-- arch/ia64/sn/kernel/irq.c | 6 +- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 37 ++ drivers/pci/hotplug/Kconfig | 5 +- drivers/pci/hotplug/Makefile | 1 + drivers/pci/hotplug/sgi_hotplug.c | 611 ++++++++++++++++++++++++ include/asm-ia64/sn/pcibr_provider.h | 4 + include/asm-ia64/sn/pcidev.h | 6 + 8 files changed, 754 insertions(+), 50 deletions(-) create mode 100644 drivers/pci/hotplug/sgi_hotplug.c diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 041c4be02b2a..a67f39e448cb 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -23,6 +23,14 @@ nasid_t master_nasid = INVALID_NASID; /* Partition Master */ +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + struct slab_info { struct hubdev_info hubdev; }; @@ -136,23 +144,6 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, return ret_stuff.v0; } -/* - * sn_alloc_pci_sysdata() - This routine allocates a pci controller - * which is expected as the pci_dev and pci_bus sysdata by the Linux - * PCI infrastructure. - */ -static inline struct pci_controller *sn_alloc_pci_sysdata(void) -{ - struct pci_controller *pci_sysdata; - - pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL); - if (!pci_sysdata) - BUG(); - - memset(pci_sysdata, 0, sizeof(*pci_sysdata)); - return pci_sysdata; -} - /* * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for * each node in the system. @@ -220,6 +211,15 @@ static void sn_fixup_ionodes(void) } +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + /* * sn_pci_fixup_slot() - This routine sets up a slot's resources * consistent with the Linux PCI abstraction layer. Resources acquired @@ -238,6 +238,7 @@ void sn_pci_fixup_slot(struct pci_dev *dev) unsigned long size; unsigned int bus_no, devfn; + pci_dev_get(dev); /* for the sysdata pointer */ dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); if (SN_PCIDEV_INFO(dev) <= 0) BUG(); /* Cannot afford to run out of memory */ @@ -276,7 +277,8 @@ void sn_pci_fixup_slot(struct pci_dev *dev) dev->resource[idx].parent = &iomem_resource; } - /* Using the PROMs values for the PCI host bus, get the Linux + /* + * Using the PROMs values for the PCI host bus, get the Linux * PCI host_pci_dev struct and set up host bus linkages */ @@ -313,55 +315,57 @@ void sn_pci_fixup_slot(struct pci_dev *dev) * sn_pci_controller_fixup() - This routine sets up a bus's resources * consistent with the Linux PCI abstraction layer. */ -static void sn_pci_controller_fixup(int segment, int busnum) +void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { int status = 0; int nasid, cnode; - struct pci_bus *bus; struct pci_controller *controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; void *provider_soft; struct sn_pcibus_provider *provider; - status = - sal_get_pcibus_info((u64) segment, (u64) busnum, - (u64) ia64_tpa(&prom_bussoft_ptr)); - if (status > 0) { - return; /* bus # does not exist */ - } - + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ prom_bussoft_ptr = __va(prom_bussoft_ptr); - controller = sn_alloc_pci_sysdata(); - /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */ - bus = pci_scan_bus(busnum, &pci_root_ops, controller); + controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + if (!controller) + BUG(); + if (bus == NULL) { - return; /* error, or bus already scanned */ + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) + return; /* error, or bus already scanned */ + bus->sysdata = NULL; } + if (bus->sysdata) + goto error_return; /* sysdata already alloc'd */ + /* * Per-provider fixup. Copies the contents from prom to local * area and links SN_PCIBUS_BUSSOFT(). */ - if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) return; /* unsupported asic type */ - } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + goto error_return; /* no further fixup necessary */ provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; - if (provider == NULL) { + if (provider == NULL) return; /* no provider registerd for this asic */ - } provider_soft = NULL; - if (provider->bus_fixup) { + if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr); - } - if (provider_soft == NULL) { + if (provider_soft == NULL) return; /* fixup failed or not applicable */ - } /* * Generic bus fixup goes here. Don't reference prom_bussoft_ptr @@ -370,12 +374,47 @@ static void sn_pci_controller_fixup(int segment, int busnum) bus->sysdata = controller; PCI_CONTROLLER(bus)->platform_data = provider_soft; - nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); cnode = nasid_to_cnodeid(nasid); hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + return; + +error_return: + + kfree(controller); + return; +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kcalloc(1, sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + return; + } + element->sysdata = dev->sysdata; + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list; + +sn_sysdata_free_start: + list_for_each(list, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + kfree(element->sysdata); + kfree(element); + goto sn_sysdata_free_start; + } + return; } /* @@ -413,15 +452,16 @@ static int __init sn_pci_init(void) ia64_max_iommu_merge_mask = ~PAGE_MASK; sn_fixup_ionodes(); sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); sn_init_cpei_timer(); #ifdef CONFIG_PROC_FS register_sn_procfs(); #endif - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) { - sn_pci_controller_fixup(0, i); - } + /* busses are not known yet ... */ + for (i = 0; i < PCI_BUSES_TO_SCAN; i++) + sn_pci_controller_fixup(0, i, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev @@ -430,9 +470,8 @@ static int __init sn_pci_init(void) */ while ((pci_dev = - pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) { + pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) sn_pci_fixup_slot(pci_dev); - } sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ @@ -474,3 +513,8 @@ cnodeid_get_geoid(cnodeid_t cnode) } subsys_initcall(sn_pci_init); +EXPORT_SYMBOL(sn_pci_fixup_slot); +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_pci_controller_fixup); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index cf4dbf9645f1..84d276a14ecb 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -284,7 +284,6 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) int cpu = nasid_slice_to_cpuid(nasid, slice); pci_dev_get(pci_dev); - sn_irq_info->irq_cpuid = cpu; sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); @@ -305,15 +304,16 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) return; sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; - if (!sn_irq_info || !sn_irq_info->irq_irq) + if (!sn_irq_info || !sn_irq_info->irq_irq) { + kfree(sn_irq_info); return; + } unregister_intr_pda(sn_irq_info); spin_lock(&sn_irq_info_lock); list_del_rcu(&sn_irq_info->list); spin_unlock(&sn_irq_info_lock); call_rcu(&sn_irq_info->rcu, sn_irq_info_free); - pci_dev_put(pci_dev); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 9bc4de4a3ec0..9813da56d311 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -18,6 +18,40 @@ #include "xtalk/xwidgetdev.h" #include "xtalk/hubdev.h" +int +sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, (u64) busnum, + (u64) device, (u64) resp, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +int +sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action, + void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE, + (u64) busnum, (u64) device, (u64) action, + (u64) resp, 0, 0, 0); + + return (int)ret_stuff.v0; +} + static int sal_pcibr_error_interrupt(struct pcibus_info *soft) { struct ia64_sal_retval ret_stuff; @@ -187,3 +221,6 @@ pcibr_init_provider(void) return 0; } + +EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); +EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 1a4d4ca2a4dc..9c4a39ee89b5 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -187,9 +187,10 @@ config HOTPLUG_PCI_RPA_DLPAR config HOTPLUG_PCI_SGI tristate "SGI PCI Hotplug Support" - depends on HOTPLUG_PCI && IA64_SGI_SN2 + depends on HOTPLUG_PCI && (IA64_SGI_SN2 || IA64_GENERIC) help - Say Y here if you have an SGI IA64 Altix system. + Say Y here if you want to use the SGI Altix Hotplug + Driver for PCI devices. When in doubt, say N. diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 3e632ff8c717..31a307004b94 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o +obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o pci_hotplug-objs := pci_hotplug_core.o diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c new file mode 100644 index 000000000000..323041fd41dc --- /dev/null +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -0,0 +1,611 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005 Silicon Graphics, Inc. All rights reserved. + * + * This work was based on the 2.4/2.6 kernel development by Dick Reigner. + * Work to add BIOS PROM support was completed by Mike Habeck. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/types.h> + +#include <asm/sn/addrs.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/types.h> + +#include "../pci.h" +#include "pci_hotplug.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("SGI (prarit@sgi.com, dickie@sgi.com, habeck@sgi.com)"); +MODULE_DESCRIPTION("SGI Altix Hot Plug PCI Controller Driver"); + +#define PCIIO_ASIC_TYPE_TIOCA 4 +#define PCI_SLOT_ALREADY_UP 2 /* slot already up */ +#define PCI_SLOT_ALREADY_DOWN 3 /* slot already down */ +#define PCI_L1_ERR 7 /* L1 console command error */ +#define PCI_EMPTY_33MHZ 15 /* empty 33 MHz bus */ +#define PCI_L1_QSIZE 128 /* our L1 message buffer size */ +#define SN_MAX_HP_SLOTS 32 /* max number of hotplug slots */ +#define SGI_HOTPLUG_PROM_REV 0x0420 /* Min. required PROM version */ + +/* internal list head */ +static struct list_head sn_hp_list; + +/* hotplug_slot struct's private pointer */ +struct slot { + int device_num; + struct pci_bus *pci_bus; + /* this struct for glue internal only */ + struct hotplug_slot *hotplug_slot; + struct list_head hp_list; +}; + +struct pcibr_slot_enable_resp { + int resp_sub_errno; + char resp_l1_msg[PCI_L1_QSIZE + 1]; +}; + +struct pcibr_slot_disable_resp { + int resp_sub_errno; + char resp_l1_msg[PCI_L1_QSIZE + 1]; +}; + +enum sn_pci_req_e { + PCI_REQ_SLOT_ELIGIBLE, + PCI_REQ_SLOT_DISABLE +}; + +static int enable_slot(struct hotplug_slot *slot); +static int disable_slot(struct hotplug_slot *slot); +static int get_power_status(struct hotplug_slot *slot, u8 *value); + +static struct hotplug_slot_ops sn_hotplug_slot_ops = { + .owner = THIS_MODULE, + .enable_slot = enable_slot, + .disable_slot = disable_slot, + .get_power_status = get_power_status, +}; + +static DECLARE_MUTEX(sn_hotplug_sem); + +static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) +{ + struct pcibus_info *pcibus_info; + int bricktype; + int bus_num; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + /* Check to see if this is a valid slot on 'pci_bus' */ + if (!(pcibus_info->pbi_valid_devices & (1 << device))) + return -EPERM; + + bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid); + bus_num = pcibus_info->pbi_buscommon.bs_persist_busnum & 0xf; + + /* Do not allow hotplug operations on base I/O cards */ + if ((bricktype == L1_BRICKTYPE_IX || bricktype == L1_BRICKTYPE_IA) && + (bus_num == 1 && device != 1)) + return -EPERM; + + return 1; +} + +static int sn_pci_bus_valid(struct pci_bus *pci_bus) +{ + struct pcibus_info *pcibus_info; + int asic_type; + int bricktype; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + /* Don't register slots hanging off the TIOCA bus */ + asic_type = pcibus_info->pbi_buscommon.bs_asic_type; + if (asic_type == PCIIO_ASIC_TYPE_TIOCA) + return -EPERM; + + /* Only register slots in I/O Bricks that support hotplug */ + bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid); + switch (bricktype) { + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_IA: + case L1_BRICKTYPE_PA: + return 1; + break; + default: + return -EPERM; + break; + } + + return -EIO; +} + +static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot, + struct pci_bus *pci_bus, int device) +{ + struct pcibus_info *pcibus_info; + struct slot *slot; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + bss_hotplug_slot->private = kcalloc(1, sizeof(struct slot), + GFP_KERNEL); + if (!bss_hotplug_slot->private) + return -ENOMEM; + slot = (struct slot *)bss_hotplug_slot->private; + + bss_hotplug_slot->name = kmalloc(33, GFP_KERNEL); + if (!bss_hotplug_slot->name) { + kfree(bss_hotplug_slot->private); + return -ENOMEM; + } + + slot->device_num = device; + slot->pci_bus = pci_bus; + + sprintf(bss_hotplug_slot->name, "module_%c%c%c%c%.2d_b_%d_s_%d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + MODULE_GET_BTCHAR(pcibus_info->pbi_moduleid), + MODULE_GET_BPOS(pcibus_info->pbi_moduleid), + ((int)pcibus_info->pbi_buscommon.bs_persist_busnum) & 0xf, + device + 1); + + slot->hotplug_slot = bss_hotplug_slot; + list_add(&slot->hp_list, &sn_hp_list); + + return 0; +} + +static struct hotplug_slot * sn_hp_destroy(void) +{ + struct slot *slot; + struct list_head *list; + struct hotplug_slot *bss_hotplug_slot = NULL; + + list_for_each(list, &sn_hp_list) { + slot = list_entry(list, struct slot, hp_list); + bss_hotplug_slot = slot->hotplug_slot; + list_del(&((struct slot *)bss_hotplug_slot->private)-> + hp_list); + break; + } + return bss_hotplug_slot; +} + +static void sn_bus_alloc_data(struct pci_dev *dev) +{ + struct list_head *node; + struct pci_bus *subordinate_bus; + struct pci_dev *child; + + sn_pci_fixup_slot(dev); + + /* Recursively sets up the sn_irq_info structs */ + if (dev->subordinate) { + subordinate_bus = dev->subordinate; + list_for_each(node, &subordinate_bus->devices) { + child = list_entry(node, struct pci_dev, bus_list); + sn_bus_alloc_data(child); + } + } +} + +static void sn_bus_free_data(struct pci_dev *dev) +{ + struct list_head *node; + struct pci_bus *subordinate_bus; + struct pci_dev *child; + + /* Recursively clean up sn_irq_info structs */ + if (dev->subordinate) { + subordinate_bus = dev->subordinate; + list_for_each(node, &subordinate_bus->devices) { + child = list_entry(node, struct pci_dev, bus_list); + sn_bus_free_data(child); + } + } + sn_pci_unfixup_slot(dev); +} + +static u8 sn_power_status_get(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + u8 retval; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + retval = pcibus_info->pbi_enabled_devices & (1 << slot->device_num); + + return retval ? 1 : 0; +} + +static void sn_slot_mark_enable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + pcibus_info->pbi_enabled_devices |= (1 << device_num); +} + +static void sn_slot_mark_disable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + pcibus_info->pbi_enabled_devices &= ~(1 << device_num); +} + +static int sn_slot_enable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + struct pcibr_slot_enable_resp resp; + int rc; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + + /* + * Power-on and initialize the slot in the SN + * PCI infrastructure. + */ + rc = sal_pcibr_slot_enable(pcibus_info, device_num, &resp); + + if (rc == PCI_SLOT_ALREADY_UP) { + dev_dbg(slot->pci_bus->self, "is already active\n"); + return -EPERM; + } + + if (rc == PCI_L1_ERR) { + dev_dbg(slot->pci_bus->self, + "L1 failure %d with message: %s", + resp.resp_sub_errno, resp.resp_l1_msg); + return -EPERM; + } + + if (rc) { + dev_dbg(slot->pci_bus->self, + "insert failed with error %d sub-error %d\n", + rc, resp.resp_sub_errno); + return -EIO; + } + + sn_slot_mark_enable(bss_hotplug_slot, device_num); + + return 0; +} + +static int sn_slot_disable(struct hotplug_slot *bss_hotplug_slot, + int device_num, int action) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + struct pcibr_slot_disable_resp resp; + int rc; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + + rc = sal_pcibr_slot_disable(pcibus_info, device_num, action, &resp); + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_SLOT_ALREADY_DOWN) { + dev_dbg(slot->pci_bus->self, "Slot %s already inactive\n"); + return -ENODEV; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_EMPTY_33MHZ) { + dev_dbg(slot->pci_bus->self, + "Cannot remove last 33MHz card\n"); + return -EPERM; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_L1_ERR) { + dev_dbg(slot->pci_bus->self, + "L1 failure %d with message \n%s\n", + resp.resp_sub_errno, resp.resp_l1_msg); + return -EPERM; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc) { + dev_dbg(slot->pci_bus->self, + "remove failed with error %d sub-error %d\n", + rc, resp.resp_sub_errno); + return -EIO; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && !rc) + return 0; + + if (action == PCI_REQ_SLOT_DISABLE && !rc) { + sn_slot_mark_disable(bss_hotplug_slot, device_num); + dev_dbg(slot->pci_bus->self, "remove successful\n"); + return 0; + } + + if (action == PCI_REQ_SLOT_DISABLE && rc) { + dev_dbg(slot->pci_bus->self,"remove failed rc = %d\n", rc); + return rc; + } + + return rc; +} + +static int enable_slot(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pci_bus *new_bus = NULL; + struct pci_dev *dev; + int func, num_funcs; + int new_ppb = 0; + int rc; + + /* Serialize the Linux PCI infrastructure */ + down(&sn_hotplug_sem); + + /* + * Power-on and initialize the slot in the SN + * PCI infrastructure. + */ + rc = sn_slot_enable(bss_hotplug_slot, slot->device_num); + if (rc) { + up(&sn_hotplug_sem); + return rc; + } + + num_funcs = pci_scan_slot(slot->pci_bus, PCI_DEVFN(slot->device_num+1, + PCI_FUNC(0))); + if (!num_funcs) { + dev_dbg(slot->pci_bus->self, "no device in slot\n"); + up(&sn_hotplug_sem); + return -ENODEV; + } + + sn_pci_controller_fixup(pci_domain_nr(slot->pci_bus), + slot->pci_bus->number, + slot->pci_bus); + /* + * Map SN resources for all functions on the card + * to the Linux PCI interface and tell the drivers + * about them. + */ + for (func = 0; func < num_funcs; func++) { + dev = pci_get_slot(slot->pci_bus, + PCI_DEVFN(slot->device_num + 1, + PCI_FUNC(func))); + + + if (dev) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + unsigned char sec_bus; + pci_read_config_byte(dev, PCI_SECONDARY_BUS, + &sec_bus); + new_bus = pci_add_new_bus(dev->bus, dev, + sec_bus); + pci_scan_child_bus(new_bus); + sn_pci_controller_fixup(pci_domain_nr(new_bus), + new_bus->number, + new_bus); + new_ppb = 1; + } + sn_bus_alloc_data(dev); + pci_dev_put(dev); + } + } + + /* Call the driver for the new device */ + pci_bus_add_devices(slot->pci_bus); + /* Call the drivers for the new devices subordinate to PPB */ + if (new_ppb) + pci_bus_add_devices(new_bus); + + up(&sn_hotplug_sem); + + if (rc == 0) + dev_dbg(slot->pci_bus->self, + "insert operation successful\n"); + else + dev_dbg(slot->pci_bus->self, + "insert operation failed rc = %d\n", rc); + + return rc; +} + +static int disable_slot(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pci_dev *dev; + int func; + int rc; + + /* Acquire update access to the bus */ + down(&sn_hotplug_sem); + + /* is it okay to bring this slot down? */ + rc = sn_slot_disable(bss_hotplug_slot, slot->device_num, + PCI_REQ_SLOT_ELIGIBLE); + if (rc) + goto leaving; + + /* Free the SN resources assigned to the Linux device.*/ + for (func = 0; func < 8; func++) { + dev = pci_get_slot(slot->pci_bus, + PCI_DEVFN(slot->device_num+1, + PCI_FUNC(func))); + if (dev) { + /* + * Some drivers may use dma accesses during the + * driver remove function. We release the sysdata + * areas after the driver remove functions have + * been called. + */ + sn_bus_store_sysdata(dev); + sn_bus_free_data(dev); + pci_remove_bus_device(dev); + pci_dev_put(dev); + } + } + + /* free the collected sysdata pointers */ + sn_bus_free_sysdata(); + + /* Deactivate slot */ + rc = sn_slot_disable(bss_hotplug_slot, slot->device_num, + PCI_REQ_SLOT_DISABLE); + leaving: + /* Release the bus lock */ + up(&sn_hotplug_sem); + + return rc; +} + +static int get_power_status(struct hotplug_slot *bss_hotplug_slot, u8 *value) +{ + down(&sn_hotplug_sem); + *value = sn_power_status_get(bss_hotplug_slot); + up(&sn_hotplug_sem); + return 0; +} + +static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot) +{ + kfree(bss_hotplug_slot->info); + kfree(bss_hotplug_slot->name); + kfree(bss_hotplug_slot->private); + kfree(bss_hotplug_slot); +} + +static int sn_hotplug_slot_register(struct pci_bus *pci_bus) +{ + int device; + struct hotplug_slot *bss_hotplug_slot; + int rc = 0; + + /* + * Currently only four devices are supported, + * in the future there maybe more -- up to 32. + */ + + for (device = 0; device < SN_MAX_HP_SLOTS ; device++) { + if (sn_pci_slot_valid(pci_bus, device) != 1) + continue; + + bss_hotplug_slot = kcalloc(1,sizeof(struct hotplug_slot), + GFP_KERNEL); + if (!bss_hotplug_slot) { + rc = -ENOMEM; + goto alloc_err; + } + + bss_hotplug_slot->info = + kcalloc(1,sizeof(struct hotplug_slot_info), + GFP_KERNEL); + if (!bss_hotplug_slot->info) { + rc = -ENOMEM; + goto alloc_err; + } + + if (sn_hp_slot_private_alloc(bss_hotplug_slot, + pci_bus, device)) { + rc = -ENOMEM; + goto alloc_err; + } + + bss_hotplug_slot->ops = &sn_hotplug_slot_ops; + bss_hotplug_slot->release = &sn_release_slot; + + rc = pci_hp_register(bss_hotplug_slot); + if (rc) + goto register_err; + } + dev_dbg(pci_bus->self, "Registered bus with hotplug\n"); + return rc; + +register_err: + dev_dbg(pci_bus->self, "bus failed to register with err = %d\n", + rc); + +alloc_err: + if (rc == -ENOMEM) + dev_dbg(pci_bus->self, "Memory allocation error\n"); + + /* destroy THIS element */ + if (bss_hotplug_slot) + sn_release_slot(bss_hotplug_slot); + + /* destroy anything else on the list */ + while ((bss_hotplug_slot = sn_hp_destroy())) + pci_hp_deregister(bss_hotplug_slot); + + return rc; +} + +static int sn_pci_hotplug_init(void) +{ + struct pci_bus *pci_bus = NULL; + int rc; + int registered = 0; + + INIT_LIST_HEAD(&sn_hp_list); + + if (sn_sal_rev() < SGI_HOTPLUG_PROM_REV) { + printk(KERN_ERR "%s: PROM version must be greater than 4.05\n", + __FUNCTION__); + return -EPERM; + } + + while ((pci_bus = pci_find_next_bus(pci_bus))) { + if (!pci_bus->sysdata) + continue; + + rc = sn_pci_bus_valid(pci_bus); + if (rc != 1) { + dev_dbg(pci_bus->self, "not a valid hotplug bus\n"); + continue; + } + dev_dbg(pci_bus->self, "valid hotplug bus\n"); + + rc = sn_hotplug_slot_register(pci_bus); + if (!rc) + registered = 1; + else { + registered = 0; + break; + } + } + + return registered == 1 ? 0 : -ENODEV; +} + +static void sn_pci_hotplug_exit(void) +{ + struct hotplug_slot *bss_hotplug_slot; + + while ((bss_hotplug_slot = sn_hp_destroy())) { + pci_hp_deregister(bss_hotplug_slot); + } + + if (!list_empty(&sn_hp_list)) + printk(KERN_ERR "%s: internal list is not empty\n", __FILE__); +} + +module_init(sn_pci_hotplug_init); +module_exit(sn_pci_hotplug_exit); diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index cbb4604c9349..2299c3ad2e33 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -151,4 +151,8 @@ extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); extern int pcibr_ate_alloc(struct pcibus_info *, int); extern void pcibr_ate_free(struct pcibus_info *, int); extern void ate_write(struct pcibus_info *, int, int, uint64_t); +extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device, + void *resp); +extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device, + int action, void *resp); #endif diff --git a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h index 9610fcc63545..49711d00ad04 100644 --- a/include/asm-ia64/sn/pcidev.h +++ b/include/asm-ia64/sn/pcidev.h @@ -23,6 +23,8 @@ #define SN_PCIBUS_BUSSOFT(pci_bus) \ ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) +#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \ + (struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) /* * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due @@ -56,6 +58,10 @@ struct pcidev_info { extern void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info); extern void sn_irq_unfixup(struct pci_dev *pci_dev); +extern void sn_pci_controller_fixup(int segment, int busnum, + struct pci_bus *bus); +extern void sn_bus_store_sysdata(struct pci_dev *dev); +extern void sn_bus_free_sysdata(void); extern void sn_pci_fixup_slot(struct pci_dev *dev); extern void sn_pci_unfixup_slot(struct pci_dev *dev); extern void sn_irq_lh_init(void); From 7fe4c1b16854f0440939c62b8102cbf5c75e7cdc Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@sgi.com> Date: Wed, 6 Jul 2005 15:30:25 -0700 Subject: [PATCH 06/11] [IA64] hotplug/ia64: SN Hotplug Driver - PREEMPT/pcibus_info fix This patch fixes an issue with the PROM and a kernel running with CONFIG_PREEMPT enabled. When CONFIG_PREEMPT is enabled, the size of a spinlock_t changes -- resulting in the PROM writing to an incorrect location. Signed-off-by: Prarit Bhargava <prarit@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- include/asm-ia64/sn/pcibr_provider.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 2299c3ad2e33..f9b8d2164007 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -108,10 +108,11 @@ struct pcibus_info { char pbi_hub_xid; uint64_t pbi_devreg[8]; - spinlock_t pbi_lock; uint32_t pbi_valid_devices; uint32_t pbi_enabled_devices; + + spinlock_t pbi_lock; }; /* From 2ba3e3e65cf182436757ba13ea8d564e2950fb56 Mon Sep 17 00:00:00 2001 From: Keith Owens <kaos@sgi.com> Date: Thu, 30 Jun 2005 22:53:00 -0700 Subject: [PATCH 07/11] [IA64] restore_sigcontext is not preempt safe restore_sigcontext calls ia64_set_local_fpu_owner() which requires that preempt be disabled. Signed-off-by: Keith Owens <kaos@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/kernel/signal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index edd9f07860b2..b8a0a7d257a9 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); psr->mfh = 0; /* drop signal handler's fph contents... */ + preempt_disable(); if (psr->dfh) ia64_drop_fpu(current); else { @@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __ia64_load_fpu(current->thread.fph); ia64_set_local_fpu_owner(current); } + preempt_enable(); } return err; } From af25e94d4dcfb9608846242fabdd4e6014e5c9f0 Mon Sep 17 00:00:00 2001 From: <> Date: Fri, 1 Jul 2005 23:27:00 -0700 Subject: [PATCH 08/11] [IA64] Make ia64 die() preempt safe Signed-off-by: Keith Owens <kaos@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/kernel/traps.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e7e520d90f03..4440c8343fa4 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -90,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err) .lock_owner_depth = 0 }; static int die_counter; + int cpu = get_cpu(); - if (die.lock_owner != smp_processor_id()) { + if (die.lock_owner != cpu) { console_verbose(); spin_lock_irq(&die.lock); - die.lock_owner = smp_processor_id(); + die.lock_owner = cpu; die.lock_owner_depth = 0; bust_spinlocks(1); } + put_cpu(); if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", From 564601a5d12f93fdde04c6bc5b097b95e7752a46 Mon Sep 17 00:00:00 2001 From: "bob.picco" <bob.picco@hp.com> Date: Thu, 30 Jun 2005 09:52:00 -0700 Subject: [PATCH 09/11] [IA64] memory-less-nodes repost I reworked how nodes with only CPUs are treated. The patch below seems simpler to me and has eliminated the complicated routine reassign_cpu_only_nodes. There isn't any longer the requirement to modify ACPI NUMA information which was in large part the complexity introduced in reassign_cpu_only_nodes. This patch will produce a different number of nodes. For example, reassign_cpu_only_nodes would reduce two CPUonly nodes and one memory node configuration to one memory+CPUs node configuration. This patch doesn't change the number of nodes which means the user will see three. Two nodes without memory and one node with all the memory. While doing this patch, I noticed that early_nr_phys_cpus_node isn't serving any useful purpose. It is called once in find_pernode_space but the value isn't used to computer pernode space. Signed-off-by: bob.picco <bob.picco@hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/mm/discontig.c | 400 +++++++++++++++++---------------------- arch/ia64/mm/init.c | 3 +- 2 files changed, 172 insertions(+), 231 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index f3fd528ead3b..54136fd00202 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -44,150 +44,7 @@ struct early_node_data { }; static struct early_node_data mem_data[MAX_NUMNODES] __initdata; - -/** - * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node - * - * This function will move nodes with only CPUs (no memory) - * to a node with memory which is at the minimum numa_slit distance. - * Any reassigments will result in the compression of the nodes - * and renumbering the nid values where appropriate. - * The static declarations below are to avoid large stack size which - * makes the code not re-entrant. - */ -static void __init reassign_cpu_only_nodes(void) -{ - struct node_memblk_s *p; - int i, j, k, nnode, nid, cpu, cpunid, pxm; - u8 cslit, slit; - static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata; - static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata; - static int node_flip[MAX_NUMNODES] __initdata; - static int old_nid_map[NR_CPUS] __initdata; - - for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (!test_bit(p->nid, (void *) nodes_with_mem)) { - set_bit(p->nid, (void *) nodes_with_mem); - nnode++; - } - - /* - * All nids with memory. - */ - if (nnode == num_online_nodes()) - return; - - /* - * Change nids and attempt to migrate CPU-only nodes - * to the best numa_slit (closest neighbor) possible. - * For reassigned CPU nodes a nid can't be arrived at - * until after this loop because the target nid's new - * identity might not have been established yet. So - * new nid values are fabricated above num_online_nodes() and - * mapped back later to their true value. - */ - /* MCD - This code is a bit complicated, but may be unnecessary now. - * We can now handle much more interesting node-numbering. - * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES - * and that there be no holes in the numbering 0..numnodes - * has become simply 0 <= nid <= MAX_NUMNODES. - */ - nid = 0; - for_each_online_node(i) { - if (test_bit(i, (void *) nodes_with_mem)) { - /* - * Save original nid value for numa_slit - * fixup and node_cpuid reassignments. - */ - node_flip[nid] = i; - - if (i == nid) { - nid++; - continue; - } - - for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (p->nid == i) - p->nid = nid; - - cpunid = nid; - nid++; - } else - cpunid = MAX_NUMNODES; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == i) { - /* - * For nodes not being reassigned just - * fix the cpu's nid and reverse pxm map - */ - if (cpunid < MAX_NUMNODES) { - pxm = nid_to_pxm_map[i]; - pxm_to_nid_map[pxm] = - node_cpuid[cpu].nid = cpunid; - continue; - } - - /* - * For nodes being reassigned, find best node by - * numa_slit information and then make a temporary - * nid value based on current nid and num_online_nodes(). - */ - slit = 0xff; - k = 2*num_online_nodes(); - for_each_online_node(j) { - if (i == j) - continue; - else if (test_bit(j, (void *) nodes_with_mem)) { - cslit = numa_slit[i * num_online_nodes() + j]; - if (cslit < slit) { - k = num_online_nodes() + j; - slit = cslit; - } - } - } - - /* save old nid map so we can update the pxm */ - old_nid_map[cpu] = node_cpuid[cpu].nid; - node_cpuid[cpu].nid = k; - } - } - - /* - * Fixup temporary nid values for CPU-only nodes. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == (2*num_online_nodes())) { - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1; - } else { - for (i = 0; i < nnode; i++) { - if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes())) - continue; - - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i; - break; - } - } - - /* - * Fix numa_slit by compressing from larger - * nid array to reduced nid array. - */ - for (i = 0; i < nnode; i++) - for (j = 0; j < nnode; j++) - numa_slit_fix[i * nnode + j] = - numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]]; - - memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit)); - - nodes_clear(node_online_map); - for (i = 0; i < nnode; i++) - node_set_online(i); - - return; -} +static nodemask_t memory_less_mask __initdata; /* * To prevent cache aliasing effects, align per-node structures so that they @@ -232,28 +89,6 @@ static int __init build_node_maps(unsigned long start, unsigned long len, return 0; } -/** - * early_nr_phys_cpus_node - return number of physical cpus on a given node - * @node: node to check - * - * Count the number of physical cpus on @node. These are cpus that actually - * exist. We can't use nr_cpus_node() yet because - * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. - */ -static int early_nr_phys_cpus_node(int node) -{ - int cpu, n = 0; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node == node_cpuid[cpu].nid) - if ((cpu == 0) || node_cpuid[cpu].phys_id) - n++; - - return n; -} - - /** * early_nr_cpus_node - return number of cpus on a given node * @node: node to check @@ -262,7 +97,7 @@ static int early_nr_phys_cpus_node(int node) * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int early_nr_cpus_node(int node) +static int __init early_nr_cpus_node(int node) { int cpu, n = 0; @@ -273,6 +108,70 @@ static int early_nr_cpus_node(int node) return n; } +/** + * compute_pernodesize - compute size of pernode data + * @node: the node id. + */ +static unsigned long __init compute_pernodesize(int node) +{ + unsigned long pernodesize = 0, cpus; + + cpus = early_nr_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize = PAGE_ALIGN(pernodesize); + return pernodesize; +} + +/** + * fill_pernode - initialize pernode data. + * @node: the node id. + * @pernode: physical address of pernode data + * @pernodesize: size of the pernode data + */ +static void __init fill_pernode(int node, unsigned long pernode, + unsigned long pernodesize) +{ + void *cpu_data; + int cpus = early_nr_cpus_node(node), cpu; + struct bootmem_data *bdp = &mem_data[node].bootmem_data; + + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + mem_data[node].pgdat = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + + mem_data[node].pgdat->bdata = bdp; + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + /* + * Copy the static per-cpu data into the region we + * just set aside and then setup __per_cpu_offset + * for each CPU on this node. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (node == node_cpuid[cpu].nid) { + memcpy(__va(cpu_data), __phys_per_cpu_start, + __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char*)__va(cpu_data) - + __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + } + } + + return; +} /** * find_pernode_space - allocate memory for memory map and per-node structures * @start: physical start of range @@ -304,9 +203,8 @@ static int early_nr_cpus_node(int node) static int __init find_pernode_space(unsigned long start, unsigned long len, int node) { - unsigned long epfn, cpu, cpus, phys_cpus; + unsigned long epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; - void *cpu_data; struct bootmem_data *bdp = &mem_data[node].bootmem_data; epfn = (start + len) >> PAGE_SHIFT; @@ -329,49 +227,12 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, * Calculate total size needed, incl. what's necessary * for good alignment and alias prevention. */ - cpus = early_nr_cpus_node(node); - phys_cpus = early_nr_phys_cpus_node(node); - pernodesize += PERCPU_PAGE_SIZE * cpus; - pernodesize += node * L1_CACHE_BYTES; - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernodesize = PAGE_ALIGN(pernodesize); + pernodesize = compute_pernodesize(node); pernode = NODEDATA_ALIGN(start, node); /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize + mapsize)) { - mem_data[node].pernode_addr = pernode; - mem_data[node].pernode_size = pernodesize; - memset(__va(pernode), 0, pernodesize); - - cpu_data = (void *)pernode; - pernode += PERCPU_PAGE_SIZE * cpus; - pernode += node * L1_CACHE_BYTES; - - mem_data[node].pgdat = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - mem_data[node].node_data = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - - mem_data[node].pgdat->bdata = bdp; - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - /* - * Copy the static per-cpu data into the region we - * just set aside and then setup __per_cpu_offset - * for each CPU on this node. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - } - } - } + if (start + len > (pernode + pernodesize + mapsize)) + fill_pernode(node, pernode, pernodesize); return 0; } @@ -411,6 +272,9 @@ static void __init reserve_pernode_space(void) for_each_online_node(node) { pg_data_t *pdp = mem_data[node].pgdat; + if (node_isset(node, memory_less_mask)) + continue; + bdp = pdp->bdata; /* First the bootmem_map itself */ @@ -455,6 +319,83 @@ static void __init initialize_pernode_data(void) } } +/** + * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit + * node but fall back to any other node when __alloc_bootmem_node fails + * for best. + * @nid: node id + * @pernodesize: size of this node's pernode data + * @align: alignment to use for this node's pernode data + */ +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, + unsigned long align) +{ + void *ptr = NULL; + u8 best = 0xff; + int bestnode = -1, node; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + else if (node_distance(nid, node) < best) { + best = node_distance(nid, node); + bestnode = node; + } + } + + ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, + pernodesize, align, __pa(MAX_DMA_ADDRESS)); + + if (!ptr) + panic("NO memory for memory less node\n"); + return ptr; +} + +/** + * pgdat_insert - insert the pgdat into global pgdat_list + * @pgdat: the pgdat for a node. + */ +static void __init pgdat_insert(pg_data_t *pgdat) +{ + pg_data_t *prev = NULL, *next; + + for_each_pgdat(next) + if (pgdat->node_id < next->node_id) + break; + else + prev = next; + + if (prev) { + prev->pgdat_next = pgdat; + pgdat->pgdat_next = next; + } else { + pgdat->pgdat_next = pgdat_list; + pgdat_list = pgdat; + } + + return; +} + +/** + * memory_less_nodes - allocate and initialize CPU only nodes pernode + * information. + */ +static void __init memory_less_nodes(void) +{ + unsigned long pernodesize; + void *pernode; + int node; + + for_each_node_mask(node, memory_less_mask) { + pernodesize = compute_pernodesize(node); + pernode = memory_less_node_alloc(node, pernodesize, + (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024)); + fill_pernode(node, __pa(pernode), pernodesize); + } + + return; +} + /** * find_memory - walk the EFI memory map and setup the bootmem allocator * @@ -472,16 +413,19 @@ void __init find_memory(void) node_set_online(0); } + nodes_or(memory_less_mask, memory_less_mask, node_online_map); min_low_pfn = -1; max_low_pfn = 0; - if (num_online_nodes() > 1) - reassign_cpu_only_nodes(); - /* These actually end up getting called by call_pernode_memory() */ efi_memmap_walk(filter_rsvd_memory, build_node_maps); efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + for_each_online_node(node) + if (mem_data[node].bootmem_data.node_low_pfn) { + node_clear(node, memory_less_mask); + mem_data[node].min_pfn = ~0UL; + } /* * Initialize the boot memory maps in reverse order since that's * what the bootmem allocator expects @@ -492,17 +436,14 @@ void __init find_memory(void) if (!node_online(node)) continue; + else if (node_isset(node, memory_less_mask)) + continue; bdp = &mem_data[node].bootmem_data; pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; - /* Sanity check... */ - if (!pernode) - panic("pernode space for node %d " - "could not be allocated!", node); - init_bootmem_node(mem_data[node].pgdat, map>>PAGE_SHIFT, bdp->node_boot_start>>PAGE_SHIFT, @@ -512,6 +453,7 @@ void __init find_memory(void) efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); reserve_pernode_space(); + memory_less_nodes(); initialize_pernode_data(); max_pfn = max_low_pfn; @@ -680,12 +622,13 @@ void __init paging_init(void) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - /* so min() will work in count_node_pages */ - for_each_online_node(node) - mem_data[node].min_pfn = ~0UL; - efi_memmap_walk(filter_rsvd_memory, count_node_pages); + vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page)); + vmem_map = (struct page *) vmalloc_end; + efi_memmap_walk(create_mem_map_page_table, NULL); + printk("Virtual mem_map starts at 0x%p\n", vmem_map); + for_each_online_node(node) { memset(zones_size, 0, sizeof(zones_size)); memset(zholes_size, 0, sizeof(zholes_size)); @@ -719,15 +662,6 @@ void __init paging_init(void) mem_data[node].num_dma_physpages); } - if (node == 0) { - vmalloc_end -= - PAGE_ALIGN(max_low_pfn * sizeof(struct page)); - vmem_map = (struct page *) vmalloc_end; - - efi_memmap_walk(create_mem_map_page_table, NULL); - printk("Virtual mem_map starts at 0x%p\n", vmem_map); - } - pfn_offset = mem_data[node].min_pfn; NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; @@ -735,5 +669,11 @@ void __init paging_init(void) pfn_offset, zholes_size); } + /* + * Make memory less nodes become a member of the known nodes. + */ + for_each_node_mask(node, memory_less_mask) + pgdat_insert(mem_data[node].pgdat); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 4eb2f52b87a1..65f9958db9f0 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -597,7 +597,8 @@ mem_init (void) kclist_add(&kcore_kernel, _stext, _end - _stext); for_each_pgdat(pgdat) - totalram_pages += free_all_bootmem_node(pgdat); + if (pgdat->bdata->node_bootmem_map) + totalram_pages += free_all_bootmem_node(pgdat); reserved_pages = 0; efi_memmap_walk(count_reserved_pages, &reserved_pages); From 8d7e35174d02ce76e910365acaaefc281a0b72a0 Mon Sep 17 00:00:00 2001 From: Tony Luck <tony.luck@intel.com> Date: Wed, 6 Jul 2005 18:18:10 -0700 Subject: [PATCH 10/11] [IA64] fix generic/up builds Jesse Barnes provided the original version of this patch months ago, but other changes kept conflicting with it, so it got deferred. Greg Edwards dug it out of obscurity just over a week ago, and almost immediately another conflicting patch appeared (Bob Picco's memory-less nodes). I've resolved the conflicts and got it running again. CONFIG_SGI_TIOCX is set to "y" in defconfig, which causes a Tiger to not boot (oops in tiocx_init). But that can be resolved later ... get this in now before it gets stale again. Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/kernel/Makefile | 1 + arch/ia64/kernel/acpi.c | 4 +- arch/ia64/kernel/numa.c | 57 +++++++++++++++++++++++++++ arch/ia64/kernel/smpboot.c | 41 ------------------- arch/ia64/mm/discontig.c | 72 +++++++++++++++++++++++----------- include/asm-ia64/sn/arch.h | 1 + include/asm-ia64/sn/sn_cpuid.h | 5 --- 7 files changed, 111 insertions(+), 70 deletions(-) create mode 100644 arch/ia64/kernel/numa.c diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index b2e2f6509eb0..e1fb68ddec26 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index cda06f88c66e..542256e98e60 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -640,8 +640,10 @@ acpi_boot_init (void) if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; } - build_cpu_to_node_map(); # endif +#endif +#ifdef CONFIG_ACPI_NUMA + build_cpu_to_node_map(); #endif /* Make boot-up look pretty */ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c new file mode 100644 index 000000000000..a68ce6678092 --- /dev/null +++ b/arch/ia64/kernel/numa.c @@ -0,0 +1,57 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + */ +#include <linux/config.h> +#include <linux/topology.h> +#include <linux/module.h> +#include <asm/processor.h> +#include <asm/smp.h> + +u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpus_clear(node_to_cpu_mask[node]); + + for(cpu = 0; cpu < NR_CPUS; ++cpu) { + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + cpu_to_node_map[cpu] = (node >= 0) ? node : 0; + if (node >= 0) + cpu_set(cpu, node_to_cpu_mask[node]); + } +} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 623b0a546709..7d72c0d872b3 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -525,47 +525,6 @@ smp_build_cpu_map (void) } } -#ifdef CONFIG_NUMA - -/* on which node is each logical CPU (one cacheline even for 64 CPUs) */ -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_to_node_map); -/* which logical CPUs are on which nodes */ -cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; - -/* - * Build cpu to node mapping and initialize the per node cpu masks. - */ -void __init -build_cpu_to_node_map (void) -{ - int cpu, i, node; - - for(node=0; node<MAX_NUMNODES; node++) - cpus_clear(node_to_cpu_mask[node]); - for(cpu = 0; cpu < NR_CPUS; ++cpu) { - /* - * All Itanium NUMA platforms I know use ACPI, so maybe we - * can drop this ifdef completely. [EF] - */ -#ifdef CONFIG_ACPI_NUMA - node = -1; - for (i = 0; i < NR_CPUS; ++i) - if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { - node = node_cpuid[i].nid; - break; - } -#else -# error Fixme: Dunno how to build CPU-to-node map. -#endif - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); - } -} - -#endif /* CONFIG_NUMA */ - /* * Cycle through the APs sending Wakeup IPIs to boot each. */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 54136fd00202..b5c90e548195 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -125,6 +125,33 @@ static unsigned long __init compute_pernodesize(int node) return pernodesize; } +/** + * per_cpu_node_setup - setup per-cpu areas on each node + * @cpu_data: per-cpu area on this node + * @node: node to setup + * + * Copy the static per-cpu data into the region we just set aside and then + * setup __per_cpu_offset for each CPU on this node. Return a pointer to + * the end of the area. + */ +static void *per_cpu_node_setup(void *cpu_data, int node) +{ +#ifdef CONFIG_SMP + int cpu; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (node == node_cpuid[cpu].nid) { + memcpy(__va(cpu_data), __phys_per_cpu_start, + __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char*)__va(cpu_data) - + __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + } + } +#endif + return cpu_data; +} + /** * fill_pernode - initialize pernode data. * @node: the node id. @@ -135,7 +162,7 @@ static void __init fill_pernode(int node, unsigned long pernode, unsigned long pernodesize) { void *cpu_data; - int cpus = early_nr_cpus_node(node), cpu; + int cpus = early_nr_cpus_node(node); struct bootmem_data *bdp = &mem_data[node].bootmem_data; mem_data[node].pernode_addr = pernode; @@ -155,23 +182,11 @@ static void __init fill_pernode(int node, unsigned long pernode, mem_data[node].pgdat->bdata = bdp; pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - /* - * Copy the static per-cpu data into the region we - * just set aside and then setup __per_cpu_offset - * for each CPU on this node. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - } - } + cpu_data = per_cpu_node_setup(cpu_data, node); return; } + /** * find_pernode_space - allocate memory for memory map and per-node structures * @start: physical start of range @@ -300,8 +315,8 @@ static void __init reserve_pernode_space(void) */ static void __init initialize_pernode_data(void) { - int cpu, node; pg_data_t *pgdat_list[MAX_NUMNODES]; + int cpu, node; for_each_online_node(node) pgdat_list[node] = mem_data[node].pgdat; @@ -311,12 +326,22 @@ static void __init initialize_pernode_data(void) memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list, sizeof(pgdat_list)); } - +#ifdef CONFIG_SMP /* Set the node_data pointer for each per-cpu struct */ for (cpu = 0; cpu < NR_CPUS; cpu++) { node = node_cpuid[cpu].nid; per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; } +#else + { + struct cpuinfo_ia64 *cpu0_cpu_info; + cpu = 0; + node = node_cpuid[cpu].nid; + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + + ((char *)&per_cpu__cpu_info - __per_cpu_start)); + cpu0_cpu_info->node_data = mem_data[node].node_data; + } +#endif /* CONFIG_SMP */ } /** @@ -461,6 +486,7 @@ void __init find_memory(void) find_initrd(); } +#ifdef CONFIG_SMP /** * per_cpu_init - setup per-cpu variables * @@ -471,15 +497,15 @@ void *per_cpu_init(void) { int cpu; - if (smp_processor_id() == 0) { - for (cpu = 0; cpu < NR_CPUS; cpu++) { - per_cpu(local_per_cpu_offset, cpu) = - __per_cpu_offset[cpu]; - } - } + if (smp_processor_id() != 0) + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } +#endif /* CONFIG_SMP */ /** * show_mem - give short summary of memory stats diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 635fdce854a8..ab827d298569 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -11,6 +11,7 @@ #ifndef _ASM_IA64_SN_ARCH_H #define _ASM_IA64_SN_ARCH_H +#include <linux/numa.h> #include <asm/types.h> #include <asm/percpu.h> #include <asm/sn/types.h> diff --git a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h index 20b300187669..d2c1d34dcce4 100644 --- a/include/asm-ia64/sn/sn_cpuid.h +++ b/include/asm-ia64/sn/sn_cpuid.h @@ -81,11 +81,6 @@ * */ -#ifndef CONFIG_SMP -#define cpu_physical_id(cpuid) ((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff) -#endif - - #define get_node_number(addr) NASID_GET(addr) /* From 21517a57e838a1fbb7a54a8a77501024e77f83e0 Mon Sep 17 00:00:00 2001 From: Jack Steiner <steiner@sgi.com> Date: Thu, 7 Jul 2005 09:14:00 -0700 Subject: [PATCH 11/11] [IA64] - Disable tiocx driver on non-SN systems Disable the tiocx driver on non-SN systems. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/ia64/sn/kernel/tiocx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 8716f4d5314b..c1cbcd1a1398 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -14,6 +14,7 @@ #include <linux/proc_fs.h> #include <linux/device.h> #include <linux/delay.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> @@ -481,6 +482,9 @@ static int __init tiocx_init(void) cnodeid_t cnodeid; int found_tiocx_device = 0; + if (!ia64_platform_is("sn2")) + return -ENODEV; + bus_register(&tiocx_bus_type); for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {