linux/arch/x86_64/kernel/apic.c
Andi Kleen 751521149a [PATCH] x86_64: Collected NMI watchdog fixes.
Collected NMI watchdog fixes.

- Fix call of check_nmi_watchdog

- Remove earlier move of check_nmi_watchdog to later.  It does not fix the
  race it was supposed to fix fully.

- Remove unused P6 definitions

- Add support for performance counter based watchdog on P4 systems.

  This allows to run it only once per second, which saves some CPU time.
  Previously it would run at 1000Hz, which was too much.

  Code ported from i386

  Make this the default on Intel systems.

- Use check_nmi_watchdog with local APIC based nmi

- Fix race in touch_nmi_watchdog

- Fix bug that caused incorrect performance counters to be programmed in a
  few cases on K8.

- Remove useless check for local APIC

- Use local_t and per_cpu variables for per CPU data.

- Keep other CPUs busy during check_nmi_watchdog to make sure they really
  tick when in lapic mode.

- Only check CPUs that are actually online.

- Various other fixes.

- Fix fallback path when MSRs are unimplemented

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-17 07:59:16 -07:00

1090 lines
27 KiB
C

/*
* Local APIC handling, local APIC timers
*
* (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
*
* Fixes
* Maciej W. Rozycki : Bits for genuine 82489DX APICs;
* thanks to Eric Gilmore
* and Rolf G. Tews
* for testing these extensively.
* Maciej W. Rozycki : Various updates and fixes.
* Mikael Pettersson : Power Management for UP-APIC.
* Pavel Machek and
* Mikael Pettersson : PM converted to driver model.
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
#include <asm/atomic.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
#include <asm/mach_apic.h>
#include <asm/nmi.h>
int apic_verbosity;
int disable_apic_timer __initdata;
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
static DEFINE_PER_CPU(int, prof_multiplier) = 1;
static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
static DEFINE_PER_CPU(int, prof_counter) = 1;
static void apic_pm_activate(void);
void enable_NMI_through_LVT0 (void * dummy)
{
unsigned int v, ver;
ver = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(ver);
v = APIC_DM_NMI; /* unmask and set to NMI */
apic_write_around(APIC_LVT0, v);
}
int get_maxlvt(void)
{
unsigned int v, ver, maxlvt;
v = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(v);
maxlvt = GET_APIC_MAXLVT(v);
return maxlvt;
}
void clear_local_APIC(void)
{
int maxlvt;
unsigned int v;
maxlvt = get_maxlvt();
/*
* Masking an LVT entry on a P6 can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
*/
if (maxlvt >= 3) {
v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
}
/*
* Careful: we have to set masks only first to deassert
* any level-triggered sources.
*/
v = apic_read(APIC_LVTT);
apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT0);
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT1);
apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
if (maxlvt >= 4) {
v = apic_read(APIC_LVTPC);
apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
}
/*
* Clean APIC state for other OSs:
*/
apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
if (maxlvt >= 3)
apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
v = GET_APIC_VERSION(apic_read(APIC_LVR));
if (APIC_INTEGRATED(v)) { /* !82489DX */
if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
}
void __init connect_bsp_APIC(void)
{
if (pic_mode) {
/*
* Do not trust the local APIC being empty at bootup.
*/
clear_local_APIC();
/*
* PIC mode, enable APIC mode in the IMCR, i.e.
* connect BSP's local APIC to INT and NMI lines.
*/
apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n");
outb(0x70, 0x22);
outb(0x01, 0x23);
}
}
void disconnect_bsp_APIC(void)
{
if (pic_mode) {
/*
* Put the board back into PIC mode (has an effect
* only on certain older boards). Note that APIC
* interrupts, including IPIs, won't work beyond
* this point! The only exception are INIT IPIs.
*/
apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n");
outb(0x70, 0x22);
outb(0x00, 0x23);
}
}
void disable_local_APIC(void)
{
unsigned int value;
clear_local_APIC();
/*
* Disable APIC (implies clearing of registers
* for 82489DX!).
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write_around(APIC_SPIV, value);
}
/*
* This is to verify that we're looking at a real local APIC.
* Check these against your board if the CPUs aren't getting
* started for no apparent reason.
*/
int __init verify_local_APIC(void)
{
unsigned int reg0, reg1;
/*
* The version register is read-only in a real APIC.
*/
reg0 = apic_read(APIC_LVR);
apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
reg1 = apic_read(APIC_LVR);
apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
/*
* The two version reads above should print the same
* numbers. If the second one is different, then we
* poke at a non-APIC.
*/
if (reg1 != reg0)
return 0;
/*
* Check if the version looks reasonably.
*/
reg1 = GET_APIC_VERSION(reg0);
if (reg1 == 0x00 || reg1 == 0xff)
return 0;
reg1 = get_maxlvt();
if (reg1 < 0x02 || reg1 == 0xff)
return 0;
/*
* The ID register is read/write in a real APIC.
*/
reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK))
return 0;
/*
* The next two are just to see if we have sane values.
* They're only really relevant if we're in Virtual Wire
* compatibility mode, but most boxes are anymore.
*/
reg0 = apic_read(APIC_LVT0);
apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0);
reg1 = apic_read(APIC_LVT1);
apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
return 1;
}
void __init sync_Arb_IDs(void)
{
/* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
if (ver >= 0x14) /* P4 or higher */
return;
/*
* Wait for idle.
*/
apic_wait_icr_idle();
apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
| APIC_DM_INIT);
}
extern void __error_in_apic_c (void);
/*
* An initial setup of the virtual wire mode.
*/
void __init init_bsp_APIC(void)
{
unsigned int value, ver;
/*
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
if (smp_found_config || !cpu_has_apic)
return;
value = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(value);
/*
* Do not trust the local APIC being empty at bootup.
*/
clear_local_APIC();
/*
* Enable APIC.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED;
value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR;
apic_write_around(APIC_SPIV, value);
/*
* Set up the virtual wire mode.
*/
apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
if (!APIC_INTEGRATED(ver)) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
}
void __init setup_local_APIC (void)
{
unsigned int value, ver, maxlvt;
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (esr_disable) {
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
}
value = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(value);
if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
__error_in_apic_c();
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
if (!apic_id_registered())
BUG();
/*
* Intel recommends to set DFR, LDR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
init_apic_ldr();
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
apic_write_around(APIC_TASKPRI, value);
/*
* Now that we are all set up, enable the APIC
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
/*
* Enable APIC
*/
value |= APIC_SPIV_APIC_ENABLED;
/*
* Some unknown Intel IO/APIC (or APIC) errata is biting us with
* certain networking cards. If high frequency interrupts are
* happening on a particular IOAPIC pin, plus the IOAPIC routing
* entry is masked/unmasked at a high rate as well then sooner or
* later IOAPIC line gets 'stuck', no more interrupts are received
* from the device. If focus CPU is disabled then the hang goes
* away, oh well :-(
*
* [ This bug can be reproduced easily with a level-triggered
* PCI Ne2000 networking cards and PII/PIII processors, dual
* BX chipset. ]
*/
/*
* Actually disabling the focus CPU check just makes the hang less
* frequent as it makes the interrupt distributon model be more
* like LRU than MRU (the short-term load is more even across CPUs).
* See also the comment in end_level_ioapic_irq(). --macro
*/
#if 1
/* Enable focus processor (bit==0) */
value &= ~APIC_SPIV_FOCUS_DISABLED;
#else
/* Disable focus processor (bit==1) */
value |= APIC_SPIV_FOCUS_DISABLED;
#endif
/*
* Set spurious IRQ vector
*/
value |= SPURIOUS_APIC_VECTOR;
apic_write_around(APIC_SPIV, value);
/*
* Set up LVT0, LVT1:
*
* set up through-local-APIC on the BP's LINT0. This is not
* strictly necessary in pure symmetric-IO mode, but sometimes
* we delegate interrupts to the 8259A.
*/
/*
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
if (!smp_processor_id() && (pic_mode || !value)) {
value = APIC_DM_EXTINT;
apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id());
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id());
}
apic_write_around(APIC_LVT0, value);
/*
* only the BP should see the LINT1 NMI signal, obviously.
*/
if (!smp_processor_id())
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
if (!APIC_INTEGRATED(ver)) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
unsigned oldvalue;
maxlvt = get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
oldvalue = apic_read(APIC_ESR);
value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
/*
* spec says clear errors after enabling vector.
*/
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
if (value != oldvalue)
apic_printk(APIC_VERBOSE,
"ESR value after enabling vector: %08x, after %08x\n",
oldvalue, value);
} else {
if (esr_disable)
/*
* Something untraceble is creating bad interrupts on
* secondary quads ... for the moment, just leave the
* ESR disabled - we can't do anything useful with the
* errors anyway - mbligh
*/
apic_printk(APIC_DEBUG, "Leaving ESR disabled.\n");
else
apic_printk(APIC_DEBUG, "No ESR for 82489DX.\n");
}
nmi_watchdog_default();
if (nmi_watchdog == NMI_LOCAL_APIC)
setup_apic_nmi_watchdog();
apic_pm_activate();
}
#ifdef CONFIG_PM
static struct {
/* 'active' is true if the local APIC was enabled by us and
not the BIOS; this signifies that we are also responsible
for disabling it before entering apm/acpi suspend */
int active;
/* r/w apic fields */
unsigned int apic_id;
unsigned int apic_taskpri;
unsigned int apic_ldr;
unsigned int apic_dfr;
unsigned int apic_spiv;
unsigned int apic_lvtt;
unsigned int apic_lvtpc;
unsigned int apic_lvt0;
unsigned int apic_lvt1;
unsigned int apic_lvterr;
unsigned int apic_tmict;
unsigned int apic_tdcr;
unsigned int apic_thmr;
} apic_pm_state;
static int lapic_suspend(struct sys_device *dev, pm_message_t state)
{
unsigned long flags;
if (!apic_pm_state.active)
return 0;
apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
local_save_flags(flags);
local_irq_disable();
disable_local_APIC();
local_irq_restore(flags);
return 0;
}
static int lapic_resume(struct sys_device *dev)
{
unsigned int l, h;
unsigned long flags;
if (!apic_pm_state.active)
return 0;
/* XXX: Pavel needs this for S3 resume, but can't explain why */
set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
local_irq_save(flags);
rdmsr(MSR_IA32_APICBASE, l, h);
l &= ~MSR_IA32_APICBASE_BASE;
l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
wrmsr(MSR_IA32_APICBASE, l, h);
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr);
apic_write(APIC_LDR, apic_pm_state.apic_ldr);
apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
local_irq_restore(flags);
return 0;
}
static struct sysdev_class lapic_sysclass = {
set_kset_name("lapic"),
.resume = lapic_resume,
.suspend = lapic_suspend,
};
static struct sys_device device_lapic = {
.id = 0,
.cls = &lapic_sysclass,
};
static void __init apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
static int __init init_lapic_sysfs(void)
{
int error;
if (!cpu_has_apic)
return 0;
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
error = sysdev_class_register(&lapic_sysclass);
if (!error)
error = sysdev_register(&device_lapic);
return error;
}
device_initcall(init_lapic_sysfs);
#else /* CONFIG_PM */
static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
static int __init apic_set_verbosity(char *str)
{
if (strcmp("debug", str) == 0)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", str) == 0)
apic_verbosity = APIC_VERBOSE;
else
printk(KERN_WARNING "APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug", str);
return 0;
}
__setup("apic=", apic_set_verbosity);
/*
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
* On AMD64 we trust the BIOS - if it says no APIC it is likely
* not correctly set up (usually the APIC timer won't work etc.)
*/
static int __init detect_init_APIC (void)
{
if (!cpu_has_apic) {
printk(KERN_INFO "No local APIC present\n");
return -1;
}
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
boot_cpu_id = 0;
return 0;
}
void __init init_apic_mappings(void)
{
unsigned long apic_phys;
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
* one for the IO-APIC.
*/
if (!smp_found_config && detect_init_APIC()) {
apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
apic_phys = __pa(apic_phys);
} else
apic_phys = mp_lapic_addr;
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
/*
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
if (boot_cpu_id == -1U)
boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
#ifdef CONFIG_X86_IO_APIC
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
int i;
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
} else {
ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
__fix_to_virt(idx), ioapic_phys);
idx++;
}
}
#endif
}
/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
* this function twice on the boot CPU, once with a bogus timeout
* value, second time for real. The other (noncalibrating) CPUs
* call this function only once, with the real, calibrated value.
*
* We do reads before writes even if unnecessary, to get around the
* P5 APIC double write bug.
*/
#define APIC_DIVISOR 16
static void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt_value, tmp_value, ver;
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
apic_write_around(APIC_LVTT, lvtt_value);
/*
* Divide PICLK by 16
*/
tmp_value = apic_read(APIC_TDCR);
apic_write_around(APIC_TDCR, (tmp_value
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
static void setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
local_irq_save(flags);
/* For some reasons this doesn't work on Simics, so fake it for now */
if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
__setup_APIC_LVTT(clocks);
return;
}
/* wait for irq slice */
if (vxtime.hpet_address) {
int trigger = hpet_readl(HPET_T0_CMP);
while (hpet_readl(HPET_COUNTER) >= trigger)
/* do nothing */ ;
while (hpet_readl(HPET_COUNTER) < trigger)
/* do nothing */ ;
} else {
int c1, c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
c2 |= inb_p(0x40) << 8;
do {
c1 = c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
c2 |= inb_p(0x40) << 8;
} while (c2 - c1 < 300);
}
__setup_APIC_LVTT(clocks);
local_irq_restore(flags);
}
/*
* In this function we calibrate APIC bus clocks to the external
* timer. Unfortunately we cannot use jiffies and the timer irq
* to calibrate, since some later bootup code depends on getting
* the first irq? Ugh.
*
* We want to do the calibration only once since we
* want to have local timer irqs syncron. CPUs connected
* by the same APIC bus have the very same bus frequency.
* And we want to have irqs off anyways, no accidental
* APIC irq that way.
*/
#define TICK_COUNT 100000000
static int __init calibrate_APIC_clock(void)
{
int apic, apic_start, tsc, tsc_start;
int result;
/*
* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
__setup_APIC_LVTT(1000000000);
apic_start = apic_read(APIC_TMCCT);
rdtscl(tsc_start);
do {
apic = apic_read(APIC_TMCCT);
rdtscl(tsc);
} while ((tsc - tsc_start) < TICK_COUNT && (apic - apic_start) < TICK_COUNT);
result = (apic_start - apic) * 1000L * cpu_khz / (tsc - tsc_start);
printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
result / 1000 / 1000, result / 1000 % 1000);
return result * APIC_DIVISOR / HZ;
}
static unsigned int calibration_result;
void __init setup_boot_APIC_clock (void)
{
if (disable_apic_timer) {
printk(KERN_INFO "Disabling APIC timer\n");
return;
}
printk(KERN_INFO "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
local_irq_disable();
calibration_result = calibrate_APIC_clock();
/*
* Now set up the timer for real.
*/
setup_APIC_timer(calibration_result);
local_irq_enable();
}
void __init setup_secondary_APIC_clock(void)
{
local_irq_disable(); /* FIXME: Do we need this? --RR */
setup_APIC_timer(calibration_result);
local_irq_enable();
}
void __init disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
v = apic_read(APIC_LVTT);
apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
}
}
void enable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
v = apic_read(APIC_LVTT);
apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
}
}
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
*/
int setup_profiling_timer(unsigned int multiplier)
{
int i;
/*
* Sanity check. [at least 500 APIC cycles should be
* between APIC interrupts as a rule of thumb, to avoid
* irqs flooding us]
*/
if ( (!multiplier) || (calibration_result/multiplier < 500))
return -EINVAL;
/*
* Set the new multiplier for each CPU. CPUs don't start using the
* new values until the next timer interrupt in which they do process
* accounting. At that time they also adjust their APIC timers
* accordingly.
*/
for (i = 0; i < NR_CPUS; ++i)
per_cpu(prof_multiplier, i) = multiplier;
return 0;
}
#undef APIC_DIVISOR
/*
* Local timer interrupt handler. It does both profiling and
* process statistics/rescheduling.
*
* We do profiling in every local tick, statistics/rescheduling
* happen only every 'profiling multiplier' ticks. The default
* multiplier is 1 and it can be changed by writing the new multiplier
* value into /proc/profile.
*/
void smp_local_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
profile_tick(CPU_PROFILING, regs);
if (--per_cpu(prof_counter, cpu) <= 0) {
/*
* The multiplier may have changed since the last time we got
* to this point as a result of the user writing to
* /proc/profile. In this case we need to adjust the APIC
* timer accordingly.
*
* Interrupts are already masked off at this point.
*/
per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
if (per_cpu(prof_counter, cpu) !=
per_cpu(prof_old_multiplier, cpu)) {
__setup_APIC_LVTT(calibration_result/
per_cpu(prof_counter, cpu));
per_cpu(prof_old_multiplier, cpu) =
per_cpu(prof_counter, cpu);
}
#ifdef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
}
/*
* We take the 'long' return path, and there every subsystem
* grabs the appropriate locks (kernel lock/ irq lock).
*
* we might want to decouple profiling from the 'long path',
* and do the profiling totally in assembly.
*
* Currently this isn't too much of an issue (performance wise),
* we can take more than 100K local irqs per second on a 100 MHz P5.
*/
}
/*
* Local APIC timer interrupt. This is the most natural way for doing
* local interrupts, but local timer interrupts can be emulated by
* broadcast interrupts too. [in case the hw doesn't support APIC timers]
*
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
void smp_apic_timer_interrupt(struct pt_regs *regs)
{
/*
* the NMI deadlock-detector uses this.
*/
add_pda(apic_timer_irqs, 1);
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
*/
ack_APIC_irq();
/*
* update_process_times() expects us to have done irq_enter().
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
smp_local_timer_interrupt(regs);
irq_exit();
}
/*
* oem_force_hpet_timer -- force HPET mode for some boxes.
*
* Thus far, the major user of this is IBM's Summit2 series:
*
* Clustered boxes may have unsynced TSC problems if they are
* multi-chassis. Use available data to take a good guess.
* If in doubt, go HPET.
*/
__init int oem_force_hpet_timer(void)
{
int i, clusters, zeros;
unsigned id;
DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
for (i = 0; i < NR_CPUS; i++) {
id = bios_cpu_apicid[i];
if (id != BAD_APICID)
__set_bit(APIC_CLUSTERID(id), clustermap);
}
/* Problem: Partially populated chassis may not have CPUs in some of
* the APIC clusters they have been allocated. Only present CPUs have
* bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since
* clusters are allocated sequentially, count zeros only if they are
* bounded by ones.
*/
clusters = 0;
zeros = 0;
for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
if (test_bit(i, clustermap)) {
clusters += 1 + zeros;
zeros = 0;
} else
++zeros;
}
/*
* If clusters > 2, then should be multi-chassis. Return 1 for HPET.
* Else return 0 to use TSC.
* May have to revisit this when multi-core + hyperthreaded CPUs come
* out, but AFAIK this will work even for them.
*/
return (clusters > 2);
}
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
asmlinkage void smp_spurious_interrupt(void)
{
unsigned int v;
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
* Spurious interrupts should not be ACKed.
*/
v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
#if 0
static unsigned long last_warning;
static unsigned long skipped;
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
if (time_before(last_warning+30*HZ,jiffies)) {
printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
smp_processor_id(), skipped);
last_warning = jiffies;
skipped = 0;
} else {
skipped++;
}
#endif
irq_exit();
}
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
asmlinkage void smp_error_interrupt(void)
{
unsigned int v, v1;
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
v1 = apic_read(APIC_ESR);
ack_APIC_irq();
atomic_inc(&irq_err_count);
/* Here is what the APIC error bits mean:
0: Send CS error
1: Receive CS error
2: Send accept error
3: Receive accept error
4: Reserved
5: Send illegal vector
6: Received illegal vector
7: Illegal register address
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
int disable_apic;
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
int __init APIC_init_uniprocessor (void)
{
if (disable_apic) {
printk(KERN_INFO "Apic disabled\n");
return -1;
}
if (!cpu_has_apic) {
disable_apic = 1;
printk(KERN_INFO "Apic disabled by BIOS\n");
return -1;
}
verify_local_APIC();
connect_bsp_APIC();
phys_cpu_present_map = physid_mask_of_physid(0);
apic_write_around(APIC_ID, boot_cpu_id);
setup_local_APIC();
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
else
nr_ioapics = 0;
#endif
setup_boot_APIC_clock();
check_nmi_watchdog();
return 0;
}
static __init int setup_disableapic(char *str)
{
disable_apic = 1;
return 0;
}
static __init int setup_nolapic(char *str)
{
disable_apic = 1;
return 0;
}
static __init int setup_noapictimer(char *str)
{
disable_apic_timer = 1;
return 0;
}
/* dummy parsing: see setup.c */
__setup("disableapic", setup_disableapic);
__setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */
__setup("noapictimer", setup_noapictimer);
/* no "lapic" flag - we only use the lapic when the BIOS tells us so. */