linux/arch/x86/kernel/irqinit.c

219 lines
5.5 KiB
C
Raw Normal View History

#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
#include <linux/random.h>
#include <linux/kprobes.h>
#include <linux/init.h>
#include <linux/kernel_stat.h>
#include <linux/device.h>
#include <linux/bitops.h>
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <asm/timer.h>
#include <asm/hw_irq.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
#include <asm/prom.h>
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x30-0x3f)
*/
/*
* The IO-APIC gives us many more interrupt sources. Most of these
* are unused but an SMP system is supposed to have enough memory ...
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
* across the spectrum, so we really want to be prepared to get all
* of these. Plus, more powerful systems might have more than 64
* IO-APIC registers.
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
static struct irqaction irq2 = {
.handler = no_action,
.name = "cascade",
.flags = IRQF_NO_THREAD,
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs During heavy CPU-hotplug operations the following spurious kernel warnings can trigger: do_IRQ: No ... irq handler for vector (irq -1) [ See: https://bugzilla.kernel.org/show_bug.cgi?id=64831 ] When downing a cpu it is possible that there are unhandled irqs left in the APIC IRR register. The following code path shows how the problem can occur: 1. CPU 5 is to go down. 2. cpu_disable() on CPU 5 executes with interrupt flag cleared by local_irq_save() via stop_machine(). 3. IRQ 12 asserts on CPU 5, setting IRR but not ISR because interrupt flag is cleared (CPU unabled to handle the irq) 4. IRQs are migrated off of CPU 5, and the vectors' irqs are set to -1. 5. stop_machine() finishes cpu_disable() 6. cpu_die() for CPU 5 executes in normal context. 7. CPU 5 attempts to handle IRQ 12 because the IRR is set for IRQ 12. The code attempts to find the vector's IRQ and cannot because it has been set to -1. 8. do_IRQ() warning displays warning about CPU 5 IRQ 12. I added a debug printk to output which CPU & vector was retriggered and discovered that that we are getting bogus events. I see a 100% correlation between this debug printk in fixup_irqs() and the do_IRQ() warning. This patchset resolves this by adding definitions for VECTOR_UNDEFINED(-1) and VECTOR_RETRIGGERED(-2) and modifying the code to use them. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=64831 Signed-off-by: Prarit Bhargava <prarit@redhat.com> Reviewed-by: Rui Wang <rui.y.wang@intel.com> Cc: Michel Lespinasse <walken@google.com> Cc: Seiji Aguchi <seiji.aguchi@hds.com> Cc: Yang Zhang <yang.z.zhang@Intel.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: janet.morgan@Intel.com Cc: tony.luck@Intel.com Cc: ruiv.wang@gmail.com Link: http://lkml.kernel.org/r/1388938252-16627-1-git-send-email-prarit@redhat.com [ Cleaned up the code a bit. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-01-05 16:10:52 +00:00
[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
};
int vector_used_by_percpu_irq(unsigned int vector)
{
int cpu;
for_each_online_cpu(cpu) {
x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs During heavy CPU-hotplug operations the following spurious kernel warnings can trigger: do_IRQ: No ... irq handler for vector (irq -1) [ See: https://bugzilla.kernel.org/show_bug.cgi?id=64831 ] When downing a cpu it is possible that there are unhandled irqs left in the APIC IRR register. The following code path shows how the problem can occur: 1. CPU 5 is to go down. 2. cpu_disable() on CPU 5 executes with interrupt flag cleared by local_irq_save() via stop_machine(). 3. IRQ 12 asserts on CPU 5, setting IRR but not ISR because interrupt flag is cleared (CPU unabled to handle the irq) 4. IRQs are migrated off of CPU 5, and the vectors' irqs are set to -1. 5. stop_machine() finishes cpu_disable() 6. cpu_die() for CPU 5 executes in normal context. 7. CPU 5 attempts to handle IRQ 12 because the IRR is set for IRQ 12. The code attempts to find the vector's IRQ and cannot because it has been set to -1. 8. do_IRQ() warning displays warning about CPU 5 IRQ 12. I added a debug printk to output which CPU & vector was retriggered and discovered that that we are getting bogus events. I see a 100% correlation between this debug printk in fixup_irqs() and the do_IRQ() warning. This patchset resolves this by adding definitions for VECTOR_UNDEFINED(-1) and VECTOR_RETRIGGERED(-2) and modifying the code to use them. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=64831 Signed-off-by: Prarit Bhargava <prarit@redhat.com> Reviewed-by: Rui Wang <rui.y.wang@intel.com> Cc: Michel Lespinasse <walken@google.com> Cc: Seiji Aguchi <seiji.aguchi@hds.com> Cc: Yang Zhang <yang.z.zhang@Intel.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: janet.morgan@Intel.com Cc: tony.luck@Intel.com Cc: ruiv.wang@gmail.com Link: http://lkml.kernel.org/r/1388938252-16627-1-git-send-email-prarit@redhat.com [ Cleaned up the code a bit. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-01-05 16:10:52 +00:00
if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED)
return 1;
}
return 0;
}
void __init init_ISA_irqs(void)
{
struct irq_chip *chip = legacy_pic->chip;
const char *name = chip->name;
int i;
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
init_bsp_APIC();
#endif
legacy_pic->init(0);
for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
}
void __init init_IRQ(void)
{
int i;
/*
* We probably need a better place for this, but it works for
* now ...
*/
x86_add_irq_domains();
/*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
* these IRQ's are handled by more mordern controllers like IO-APIC,
* then this vector space can be freed and re-used dynamically as the
* irq's migrate etc.
*/
for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
x86_init.irqs.intr_init();
}
/*
* Setup the vector to irq mappings.
*/
void setup_vector_irq(int cpu)
{
#ifndef CONFIG_X86_IO_APIC
int irq;
/*
* On most of the platforms, legacy PIC delivers the interrupts on the
* boot cpu. But there are certain platforms where PIC interrupts are
* delivered to multiple cpu's. If the legacy IRQ is handled by the
* legacy PIC, for the new cpu that is coming online, setup the static
* legacy vector to irq mapping:
*/
for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++)
per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
#endif
__setup_vector_irq(cpu);
}
static void __init smp_intr_init(void)
{
#ifdef CONFIG_SMP
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
* IPI, driven by wakeup.
*/
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
/* IPI for generic single function call */
alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
call_function_single_interrupt);
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
x86: fix panic with interrupts off (needed for MCE) For some time each panic() called with interrupts disabled triggered the !irqs_disabled() WARN_ON in smp_call_function(), producing ugly backtraces and confusing users. This is a common situation with machine checks for example which tend to call panic with interrupts disabled, but will also hit in other situations e.g. panic during early boot. In fact it means that panic cannot be called in many circumstances, which would be bad. This all started with the new fancy queued smp_call_function, which is then used by the shutdown path to shut down the other CPUs. On closer examination it turned out that the fancy RCU smp_call_function() does lots of things not suitable in a panic situation anyways, like allocating memory and relying on complex system state. I originally tried to patch this over by checking for panic there, but it was quite complicated and the original patch was also not very popular. This also didn't fix some of the underlying complexity problems. The new code in post 2.6.29 tries to patch around this by checking for oops_in_progress, but that is not enough to make this fully safe and I don't think that's a real solution because panic has to be reliable. So instead use an own vector to reboot. This makes the reboot code extremly straight forward, which is definitely a big plus in a panic situation where it is important to avoid relying on too much kernel state. The new simple code is also safe to be called from interupts off region because it is very very simple. There can be situations where it is important that panic is reliable. For example on a fatal machine check the panic is needed to get the system up again and running as quickly as possible. So it's important that panic is reliable and all function it calls simple. This is why I came up with this simple vector scheme. It's very hard to beat in simplicity. Vectors are not particularly precious anymore since all big systems are using per CPU vectors. Another possibility would have been to use an NMI similar to kdump, but there is still the problem that NMIs don't work reliably on some systems due to BIOS issues. NMIs would have been able to stop CPUs running with interrupts off too. In the sake of universal reliability I opted for using a non NMI vector for now. I put the reboot vector into the highest priority bucket of the APIC vectors and moved the 64bit UV_BAU message down instead into the next lower priority. [ Impact: bug fix, fixes an old regression ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-05-27 19:56:52 +00:00
/* IPI used for rebooting/stopping */
alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
#endif
#endif /* CONFIG_SMP */
}
static void __init apic_intr_init(void)
{
smp_intr_init();
#ifdef CONFIG_X86_THERMAL_VECTOR
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* IPI for X86 platform specific use */
alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* IRQ work interrupts: */
# ifdef CONFIG_IRQ_WORK
alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif
#endif
}
void __init native_init_IRQ(void)
{
int i;
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
x86: use used_vectors in init_IRQ() Impact: fix crash with many devices I found this crash: [ 552.616646] general protection fault: 0403 [#1] SMP [ 552.620013] last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/usb1/1-1/1-1:1.0/host13/target13:0:0/13:0:0:0/block/sr0/size [ 552.620013] CPU 0 [ 552.620013] Modules linked in: [ 552.620013] Pid: 0, comm: swapper Not tainted 2.6.30-rc1-tip-01931-g8fcafd8-dirty #28 Sun Fire X4440 [ 552.620013] RIP: 0010:[<ffffffff8023bada>] [<ffffffff8023bada>] default_idle+0x7d/0xda [ 552.620013] RSP: 0018:ffffffff81345e68 EFLAGS: 00010246 [ 552.620013] RAX: 0000000000000000 RBX: ffffffff8133d870 RCX: ffffc20000000000 [ 552.620013] RDX: 00000000001d0620 RSI: ffffffff8023bad8 RDI: ffffffff802a3169 [ 552.620013] RBP: ffffffff81345e98 R08: 0000000000000000 R09: ffffffff812244a0 [ 552.620013] R10: ffffffff81345dc8 R11: 7ebe1b6fa0bcac50 R12: 4ec4ec4ec4ec4ec5 [ 552.620013] R13: ffffffff813a54d0 R14: ffffffff813a7a40 R15: 0000000000000000 [ 552.620013] FS: 00000000006d1880(0000) GS:ffffc20000000000(0000) knlGS:0000000000000000 [ 552.620013] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b [ 552.620013] CR2: 00007fec9d936a50 CR3: 000000007d1a9000 CR4: 00000000000006e0 [ 552.620013] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 552.620013] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 552.620013] Process swapper (pid: 0, threadinfo ffffffff81344000,task ffffffff812244a0) [ 552.620013] Stack: [ 552.620013] 0000000000000000 ffffc20000000000 00000000001d0620 7ebe1b6fa0bcac50 [ 552.620013] ffffffff8133d870 4ec4ec4ec4ec4ec5 ffffffff81345ec8 ffffffff8023bd84 [ 552.620013] 4ec4ec4ec4ec4ec5 ffffffff813a54d0 7ebe1b6fa0bcac50 ffffffff8133d870 [ 552.620013] Call Trace: [ 552.620013] [<ffffffff8023bd84>] c1e_idle+0x109/0x124 [ 552.620013] [<ffffffff8023314b>] cpu_idle+0xb8/0x101 [ 552.620013] [<ffffffff80c16d6a>] rest_init+0x7e/0x94 [ 552.620013] [<ffffffff81357efc>] start_kernel+0x3dc/0x3fd [ 552.620013] [<ffffffff813572a9>] x86_64_start_reservations+0xb9/0xd4 [ 552.620013] [<ffffffff813573b2>] x86_64_start_kernel+0xee/0x109 [ 552.620013] Code: 48 8b 04 25 f8 b4 00 00 83 a0 3c e0 ff ff fb 0f ae f0 65 48 8b 04 25 f8 b4 00 00 f6 80 38 e0 ff ff 08 75 09 e8 71 76 06 00 fb f4 <eb> 06 e8 68 76 06 00 fb 65 48 8b 04 25 f8 b4 00 00 83 88 3c e0 [ 552.620013] RIP [<ffffffff8023bada>] default_idle+0x7d/0xda [ 552.620013] RSP <ffffffff81345e68> [ 552.828646] ---[ end trace 4cbfc5c01382af7f ]--- Joerg Roedel said "The 0403 error code means that there was an external interrupt with vector 0x80. Yinghai, my theory is that the kernel on this machine has no 32bit emulation compiled in, right? In this case the selector points to a zero entry which may cause the #gpf right after the hlt. But I have no idea where the external int 0x80 comes from" it turns out that we could use 0x80 for external device on 64-bit when 32-bit emulation is disabled. But we forgot to set the gate for it. try to set gate for it by checking used_vectors. Also move apic_intr_init() early to avoid setting that gate two times. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Joerg Roedel <joerg.roedel@amd.com> LKML-Reference: <49E62DFD.6010904@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-04-15 18:57:01 +00:00
apic_intr_init();
/*
* Cover the whole vector space, no vector can escape
* us. (some of these will be overridden and become
* 'special' SMP interrupts)
*/
i = FIRST_EXTERNAL_VECTOR;
for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
x86: use used_vectors in init_IRQ() Impact: fix crash with many devices I found this crash: [ 552.616646] general protection fault: 0403 [#1] SMP [ 552.620013] last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/usb1/1-1/1-1:1.0/host13/target13:0:0/13:0:0:0/block/sr0/size [ 552.620013] CPU 0 [ 552.620013] Modules linked in: [ 552.620013] Pid: 0, comm: swapper Not tainted 2.6.30-rc1-tip-01931-g8fcafd8-dirty #28 Sun Fire X4440 [ 552.620013] RIP: 0010:[<ffffffff8023bada>] [<ffffffff8023bada>] default_idle+0x7d/0xda [ 552.620013] RSP: 0018:ffffffff81345e68 EFLAGS: 00010246 [ 552.620013] RAX: 0000000000000000 RBX: ffffffff8133d870 RCX: ffffc20000000000 [ 552.620013] RDX: 00000000001d0620 RSI: ffffffff8023bad8 RDI: ffffffff802a3169 [ 552.620013] RBP: ffffffff81345e98 R08: 0000000000000000 R09: ffffffff812244a0 [ 552.620013] R10: ffffffff81345dc8 R11: 7ebe1b6fa0bcac50 R12: 4ec4ec4ec4ec4ec5 [ 552.620013] R13: ffffffff813a54d0 R14: ffffffff813a7a40 R15: 0000000000000000 [ 552.620013] FS: 00000000006d1880(0000) GS:ffffc20000000000(0000) knlGS:0000000000000000 [ 552.620013] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b [ 552.620013] CR2: 00007fec9d936a50 CR3: 000000007d1a9000 CR4: 00000000000006e0 [ 552.620013] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 552.620013] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 552.620013] Process swapper (pid: 0, threadinfo ffffffff81344000,task ffffffff812244a0) [ 552.620013] Stack: [ 552.620013] 0000000000000000 ffffc20000000000 00000000001d0620 7ebe1b6fa0bcac50 [ 552.620013] ffffffff8133d870 4ec4ec4ec4ec4ec5 ffffffff81345ec8 ffffffff8023bd84 [ 552.620013] 4ec4ec4ec4ec4ec5 ffffffff813a54d0 7ebe1b6fa0bcac50 ffffffff8133d870 [ 552.620013] Call Trace: [ 552.620013] [<ffffffff8023bd84>] c1e_idle+0x109/0x124 [ 552.620013] [<ffffffff8023314b>] cpu_idle+0xb8/0x101 [ 552.620013] [<ffffffff80c16d6a>] rest_init+0x7e/0x94 [ 552.620013] [<ffffffff81357efc>] start_kernel+0x3dc/0x3fd [ 552.620013] [<ffffffff813572a9>] x86_64_start_reservations+0xb9/0xd4 [ 552.620013] [<ffffffff813573b2>] x86_64_start_kernel+0xee/0x109 [ 552.620013] Code: 48 8b 04 25 f8 b4 00 00 83 a0 3c e0 ff ff fb 0f ae f0 65 48 8b 04 25 f8 b4 00 00 f6 80 38 e0 ff ff 08 75 09 e8 71 76 06 00 fb f4 <eb> 06 e8 68 76 06 00 fb 65 48 8b 04 25 f8 b4 00 00 83 88 3c e0 [ 552.620013] RIP [<ffffffff8023bada>] default_idle+0x7d/0xda [ 552.620013] RSP <ffffffff81345e68> [ 552.828646] ---[ end trace 4cbfc5c01382af7f ]--- Joerg Roedel said "The 0403 error code means that there was an external interrupt with vector 0x80. Yinghai, my theory is that the kernel on this machine has no 32bit emulation compiled in, right? In this case the selector points to a zero entry which may cause the #gpf right after the hlt. But I have no idea where the external int 0x80 comes from" it turns out that we could use 0x80 for external device on 64-bit when 32-bit emulation is disabled. But we forgot to set the gate for it. try to set gate for it by checking used_vectors. Also move apic_intr_init() early to avoid setting that gate two times. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Joerg Roedel <joerg.roedel@amd.com> LKML-Reference: <49E62DFD.6010904@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-04-15 18:57:01 +00:00
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
}
if (!acpi_ioapic && !of_ioapic)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
irq_ctx_init(smp_processor_id());
#endif
}