mirror of
https://github.com/torvalds/linux.git
synced 2024-11-08 21:21:47 +00:00
2623a1d55a
This fixes a NULL pointer dereference that is triggered when taking a cpu offline after oprofile was initialized, e.g.: $ opcontrol --init $ opcontrol --start-daemon $ opcontrol --shutdown $ opcontrol --deinit $ echo 0 > /sys/devices/system/cpu/cpu1/online See the crash dump below. Though the counter has been disabled the cpu notifier is still active and trying to use already freed counter data. This fix is for linux-stable. To proper fix this, the hotplug code must be rewritten. Thus I will leave a WARN_ON_ONCE() message with this patch. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffff8132ad57>] op_amd_stop+0x2d/0x8e PGD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu1/online CPU 1 Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.34-rc5-oprofile-x86_64-standard-00210-g8c00f06 #16 Anaheim/Anaheim RIP: 0010:[<ffffffff8132ad57>] [<ffffffff8132ad57>] op_amd_stop+0x2d/0x8e RSP: 0018:ffff880001843f28 EFLAGS: 00010006 RAX: 0000000000000000 RBX: 0000000000000000 RCX: dead000000200200 RDX: ffff880001843f68 RSI: dead000000100100 RDI: 0000000000000000 RBP: ffff880001843f48 R08: 0000000000000000 R09: ffff880001843f08 R10: ffffffff8102c9a5 R11: ffff88000184ea80 R12: 0000000000000000 R13: ffff88000184f6c0 R14: 0000000000000000 R15: 0000000000000000 FS: 00007fec6a92e6f0(0000) GS:ffff880001840000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000000163b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff88042fcd8000, task ffff88042fcd51d0) Stack: ffff880001843f48 0000000000000001 ffff88042e9f7d38 ffff880001843f68 <0> ffff880001843f58 ffffffff8132a602 ffff880001843f98 ffffffff810521b3 <0> ffff880001843f68 ffff880001843f68 ffff880001843f88 ffff88042fcd9fd8 Call Trace: <IRQ> [<ffffffff8132a602>] nmi_cpu_stop+0x21/0x23 [<ffffffff810521b3>] generic_smp_call_function_single_interrupt+0xdf/0x11b [<ffffffff8101804f>] smp_call_function_single_interrupt+0x22/0x31 [<ffffffff810029f3>] call_function_single_interrupt+0x13/0x20 <EOI> [<ffffffff8102c9a5>] ? wake_up_process+0x10/0x12 [<ffffffff81008701>] ? default_idle+0x22/0x37 [<ffffffff8100896d>] c1e_idle+0xdf/0xe6 [<ffffffff813f1170>] ? atomic_notifier_call_chain+0x13/0x15 [<ffffffff810012fb>] cpu_idle+0x4b/0x7e [<ffffffff813e8a4e>] start_secondary+0x1ae/0x1b2 Code: 89 e5 41 55 49 89 fd 41 54 45 31 e4 53 31 db 48 83 ec 08 89 df e8 be f8 ff ff 48 98 48 83 3c c5 10 67 7a 81 00 74 1f 49 8b 45 08 <42> 8b 0c 20 0f 32 48 c1 e2 20 25 ff ff bf ff 48 09 d0 48 89 c2 RIP [<ffffffff8132ad57>] op_amd_stop+0x2d/0x8e RSP <ffff880001843f28> CR2: 0000000000000000 ---[ end trace 679ac372d674b757 ]--- Kernel panic - not syncing: Fatal exception in interrupt Pid: 0, comm: swapper Tainted: G D 2.6.34-rc5-oprofile-x86_64-standard-00210-g8c00f06 #16 Call Trace: <IRQ> [<ffffffff813ebd6a>] panic+0x9e/0x10c [<ffffffff810474b0>] ? up+0x34/0x39 [<ffffffff81031ccc>] ? kmsg_dump+0x112/0x12c [<ffffffff813eeff1>] oops_end+0x81/0x8e [<ffffffff8101efee>] no_context+0x1f3/0x202 [<ffffffff8101f1b7>] __bad_area_nosemaphore+0x1ba/0x1e0 [<ffffffff81028d24>] ? enqueue_task_fair+0x16d/0x17a [<ffffffff810264dc>] ? activate_task+0x42/0x53 [<ffffffff8102c967>] ? try_to_wake_up+0x272/0x284 [<ffffffff8101f1eb>] bad_area_nosemaphore+0xe/0x10 [<ffffffff813f0f3f>] do_page_fault+0x1c8/0x37c [<ffffffff81028d24>] ? enqueue_task_fair+0x16d/0x17a [<ffffffff813ee55f>] page_fault+0x1f/0x30 [<ffffffff8102c9a5>] ? wake_up_process+0x10/0x12 [<ffffffff8132ad57>] ? op_amd_stop+0x2d/0x8e [<ffffffff8132ad46>] ? op_amd_stop+0x1c/0x8e [<ffffffff8132a602>] nmi_cpu_stop+0x21/0x23 [<ffffffff810521b3>] generic_smp_call_function_single_interrupt+0xdf/0x11b [<ffffffff8101804f>] smp_call_function_single_interrupt+0x22/0x31 [<ffffffff810029f3>] call_function_single_interrupt+0x13/0x20 <EOI> [<ffffffff8102c9a5>] ? wake_up_process+0x10/0x12 [<ffffffff81008701>] ? default_idle+0x22/0x37 [<ffffffff8100896d>] c1e_idle+0xdf/0xe6 [<ffffffff813f1170>] ? atomic_notifier_call_chain+0x13/0x15 [<ffffffff810012fb>] cpu_idle+0x4b/0x7e [<ffffffff813e8a4e>] start_secondary+0x1ae/0x1b2 ------------[ cut here ]------------ WARNING: at /local/rrichter/.source/linux/arch/x86/kernel/smp.c:118 native_smp_send_reschedule+0x27/0x53() Hardware name: Anaheim Modules linked in: Pid: 0, comm: swapper Tainted: G D 2.6.34-rc5-oprofile-x86_64-standard-00210-g8c00f06 #16 Call Trace: <IRQ> [<ffffffff81017f32>] ? native_smp_send_reschedule+0x27/0x53 [<ffffffff81030ee2>] warn_slowpath_common+0x77/0xa4 [<ffffffff81030f1e>] warn_slowpath_null+0xf/0x11 [<ffffffff81017f32>] native_smp_send_reschedule+0x27/0x53 [<ffffffff8102634b>] resched_task+0x60/0x62 [<ffffffff8102653a>] check_preempt_curr_idle+0x10/0x12 [<ffffffff8102c8ea>] try_to_wake_up+0x1f5/0x284 [<ffffffff8102c986>] default_wake_function+0xd/0xf [<ffffffff810a110d>] pollwake+0x57/0x5a [<ffffffff8102c979>] ? default_wake_function+0x0/0xf [<ffffffff81026be5>] __wake_up_common+0x46/0x75 [<ffffffff81026ed0>] __wake_up+0x38/0x50 [<ffffffff81031694>] printk_tick+0x39/0x3b [<ffffffff8103ac37>] update_process_times+0x3f/0x5c [<ffffffff8104dc63>] tick_periodic+0x5d/0x69 [<ffffffff8104dc90>] tick_handle_periodic+0x21/0x71 [<ffffffff81018fd0>] smp_apic_timer_interrupt+0x82/0x95 [<ffffffff81002853>] apic_timer_interrupt+0x13/0x20 [<ffffffff81030cb5>] ? panic_blink_one_second+0x0/0x7b [<ffffffff813ebdd6>] ? panic+0x10a/0x10c [<ffffffff810474b0>] ? up+0x34/0x39 [<ffffffff81031ccc>] ? kmsg_dump+0x112/0x12c [<ffffffff813eeff1>] ? oops_end+0x81/0x8e [<ffffffff8101efee>] ? no_context+0x1f3/0x202 [<ffffffff8101f1b7>] ? __bad_area_nosemaphore+0x1ba/0x1e0 [<ffffffff81028d24>] ? enqueue_task_fair+0x16d/0x17a [<ffffffff810264dc>] ? activate_task+0x42/0x53 [<ffffffff8102c967>] ? try_to_wake_up+0x272/0x284 [<ffffffff8101f1eb>] ? bad_area_nosemaphore+0xe/0x10 [<ffffffff813f0f3f>] ? do_page_fault+0x1c8/0x37c [<ffffffff81028d24>] ? enqueue_task_fair+0x16d/0x17a [<ffffffff813ee55f>] ? page_fault+0x1f/0x30 [<ffffffff8102c9a5>] ? wake_up_process+0x10/0x12 [<ffffffff8132ad57>] ? op_amd_stop+0x2d/0x8e [<ffffffff8132ad46>] ? op_amd_stop+0x1c/0x8e [<ffffffff8132a602>] ? nmi_cpu_stop+0x21/0x23 [<ffffffff810521b3>] ? generic_smp_call_function_single_interrupt+0xdf/0x11b [<ffffffff8101804f>] ? smp_call_function_single_interrupt+0x22/0x31 [<ffffffff810029f3>] ? call_function_single_interrupt+0x13/0x20 <EOI> [<ffffffff8102c9a5>] ? wake_up_process+0x10/0x12 [<ffffffff81008701>] ? default_idle+0x22/0x37 [<ffffffff8100896d>] ? c1e_idle+0xdf/0xe6 [<ffffffff813f1170>] ? atomic_notifier_call_chain+0x13/0x15 [<ffffffff810012fb>] ? cpu_idle+0x4b/0x7e [<ffffffff813e8a4e>] ? start_secondary+0x1ae/0x1b2 ---[ end trace 679ac372d674b758 ]--- Cc: Andi Kleen <andi@firstfloor.org> Cc: stable <stable@kernel.org> Signed-off-by: Robert Richter <robert.richter@amd.com>
742 lines
16 KiB
C
742 lines
16 KiB
C
/**
|
|
* @file nmi_int.c
|
|
*
|
|
* @remark Copyright 2002-2009 OProfile authors
|
|
* @remark Read the file COPYING
|
|
*
|
|
* @author John Levon <levon@movementarian.org>
|
|
* @author Robert Richter <robert.richter@amd.com>
|
|
* @author Barry Kasindorf <barry.kasindorf@amd.com>
|
|
* @author Jason Yeh <jason.yeh@amd.com>
|
|
* @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/oprofile.h>
|
|
#include <linux/sysdev.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/cpu.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/apic.h>
|
|
|
|
#include "op_counter.h"
|
|
#include "op_x86_model.h"
|
|
|
|
static struct op_x86_model_spec *model;
|
|
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
|
|
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
|
|
|
|
/* 0 == registered but off, 1 == registered and on */
|
|
static int nmi_enabled = 0;
|
|
|
|
struct op_counter_config counter_config[OP_MAX_COUNTER];
|
|
|
|
/* common functions */
|
|
|
|
u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
|
|
struct op_counter_config *counter_config)
|
|
{
|
|
u64 val = 0;
|
|
u16 event = (u16)counter_config->event;
|
|
|
|
val |= ARCH_PERFMON_EVENTSEL_INT;
|
|
val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
|
|
val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
|
|
val |= (counter_config->unit_mask & 0xFF) << 8;
|
|
event &= model->event_mask ? model->event_mask : 0xFF;
|
|
val |= event & 0xFF;
|
|
val |= (event & 0x0F00) << 24;
|
|
|
|
return val;
|
|
}
|
|
|
|
|
|
static int profile_exceptions_notify(struct notifier_block *self,
|
|
unsigned long val, void *data)
|
|
{
|
|
struct die_args *args = (struct die_args *)data;
|
|
int ret = NOTIFY_DONE;
|
|
int cpu = smp_processor_id();
|
|
|
|
switch (val) {
|
|
case DIE_NMI:
|
|
case DIE_NMI_IPI:
|
|
model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
|
|
ret = NOTIFY_STOP;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static void nmi_cpu_save_registers(struct op_msrs *msrs)
|
|
{
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *controls = msrs->controls;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < model->num_counters; ++i) {
|
|
if (counters[i].addr)
|
|
rdmsrl(counters[i].addr, counters[i].saved);
|
|
}
|
|
|
|
for (i = 0; i < model->num_controls; ++i) {
|
|
if (controls[i].addr)
|
|
rdmsrl(controls[i].addr, controls[i].saved);
|
|
}
|
|
}
|
|
|
|
static void nmi_cpu_start(void *dummy)
|
|
{
|
|
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
|
if (!msrs->controls)
|
|
WARN_ON_ONCE(1);
|
|
else
|
|
model->start(msrs);
|
|
}
|
|
|
|
static int nmi_start(void)
|
|
{
|
|
on_each_cpu(nmi_cpu_start, NULL, 1);
|
|
return 0;
|
|
}
|
|
|
|
static void nmi_cpu_stop(void *dummy)
|
|
{
|
|
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
|
if (!msrs->controls)
|
|
WARN_ON_ONCE(1);
|
|
else
|
|
model->stop(msrs);
|
|
}
|
|
|
|
static void nmi_stop(void)
|
|
{
|
|
on_each_cpu(nmi_cpu_stop, NULL, 1);
|
|
}
|
|
|
|
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
|
|
|
|
static DEFINE_PER_CPU(int, switch_index);
|
|
|
|
static inline int has_mux(void)
|
|
{
|
|
return !!model->switch_ctrl;
|
|
}
|
|
|
|
inline int op_x86_phys_to_virt(int phys)
|
|
{
|
|
return __get_cpu_var(switch_index) + phys;
|
|
}
|
|
|
|
inline int op_x86_virt_to_phys(int virt)
|
|
{
|
|
return virt % model->num_counters;
|
|
}
|
|
|
|
static void nmi_shutdown_mux(void)
|
|
{
|
|
int i;
|
|
|
|
if (!has_mux())
|
|
return;
|
|
|
|
for_each_possible_cpu(i) {
|
|
kfree(per_cpu(cpu_msrs, i).multiplex);
|
|
per_cpu(cpu_msrs, i).multiplex = NULL;
|
|
per_cpu(switch_index, i) = 0;
|
|
}
|
|
}
|
|
|
|
static int nmi_setup_mux(void)
|
|
{
|
|
size_t multiplex_size =
|
|
sizeof(struct op_msr) * model->num_virt_counters;
|
|
int i;
|
|
|
|
if (!has_mux())
|
|
return 1;
|
|
|
|
for_each_possible_cpu(i) {
|
|
per_cpu(cpu_msrs, i).multiplex =
|
|
kzalloc(multiplex_size, GFP_KERNEL);
|
|
if (!per_cpu(cpu_msrs, i).multiplex)
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
|
|
{
|
|
int i;
|
|
struct op_msr *multiplex = msrs->multiplex;
|
|
|
|
if (!has_mux())
|
|
return;
|
|
|
|
for (i = 0; i < model->num_virt_counters; ++i) {
|
|
if (counter_config[i].enabled) {
|
|
multiplex[i].saved = -(u64)counter_config[i].count;
|
|
} else {
|
|
multiplex[i].saved = 0;
|
|
}
|
|
}
|
|
|
|
per_cpu(switch_index, cpu) = 0;
|
|
}
|
|
|
|
static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
|
|
{
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *multiplex = msrs->multiplex;
|
|
int i;
|
|
|
|
for (i = 0; i < model->num_counters; ++i) {
|
|
int virt = op_x86_phys_to_virt(i);
|
|
if (counters[i].addr)
|
|
rdmsrl(counters[i].addr, multiplex[virt].saved);
|
|
}
|
|
}
|
|
|
|
static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
|
|
{
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *multiplex = msrs->multiplex;
|
|
int i;
|
|
|
|
for (i = 0; i < model->num_counters; ++i) {
|
|
int virt = op_x86_phys_to_virt(i);
|
|
if (counters[i].addr)
|
|
wrmsrl(counters[i].addr, multiplex[virt].saved);
|
|
}
|
|
}
|
|
|
|
static void nmi_cpu_switch(void *dummy)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
int si = per_cpu(switch_index, cpu);
|
|
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
|
|
|
nmi_cpu_stop(NULL);
|
|
nmi_cpu_save_mpx_registers(msrs);
|
|
|
|
/* move to next set */
|
|
si += model->num_counters;
|
|
if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
|
|
per_cpu(switch_index, cpu) = 0;
|
|
else
|
|
per_cpu(switch_index, cpu) = si;
|
|
|
|
model->switch_ctrl(model, msrs);
|
|
nmi_cpu_restore_mpx_registers(msrs);
|
|
|
|
nmi_cpu_start(NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* Quick check to see if multiplexing is necessary.
|
|
* The check should be sufficient since counters are used
|
|
* in ordre.
|
|
*/
|
|
static int nmi_multiplex_on(void)
|
|
{
|
|
return counter_config[model->num_counters].count ? 0 : -EINVAL;
|
|
}
|
|
|
|
static int nmi_switch_event(void)
|
|
{
|
|
if (!has_mux())
|
|
return -ENOSYS; /* not implemented */
|
|
if (nmi_multiplex_on() < 0)
|
|
return -EINVAL; /* not necessary */
|
|
|
|
on_each_cpu(nmi_cpu_switch, NULL, 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void mux_init(struct oprofile_operations *ops)
|
|
{
|
|
if (has_mux())
|
|
ops->switch_events = nmi_switch_event;
|
|
}
|
|
|
|
static void mux_clone(int cpu)
|
|
{
|
|
if (!has_mux())
|
|
return;
|
|
|
|
memcpy(per_cpu(cpu_msrs, cpu).multiplex,
|
|
per_cpu(cpu_msrs, 0).multiplex,
|
|
sizeof(struct op_msr) * model->num_virt_counters);
|
|
}
|
|
|
|
#else
|
|
|
|
inline int op_x86_phys_to_virt(int phys) { return phys; }
|
|
inline int op_x86_virt_to_phys(int virt) { return virt; }
|
|
static inline void nmi_shutdown_mux(void) { }
|
|
static inline int nmi_setup_mux(void) { return 1; }
|
|
static inline void
|
|
nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
|
|
static inline void mux_init(struct oprofile_operations *ops) { }
|
|
static void mux_clone(int cpu) { }
|
|
|
|
#endif
|
|
|
|
static void free_msrs(void)
|
|
{
|
|
int i;
|
|
for_each_possible_cpu(i) {
|
|
kfree(per_cpu(cpu_msrs, i).counters);
|
|
per_cpu(cpu_msrs, i).counters = NULL;
|
|
kfree(per_cpu(cpu_msrs, i).controls);
|
|
per_cpu(cpu_msrs, i).controls = NULL;
|
|
}
|
|
nmi_shutdown_mux();
|
|
}
|
|
|
|
static int allocate_msrs(void)
|
|
{
|
|
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
|
|
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
|
|
|
|
int i;
|
|
for_each_possible_cpu(i) {
|
|
per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
|
|
GFP_KERNEL);
|
|
if (!per_cpu(cpu_msrs, i).counters)
|
|
goto fail;
|
|
per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
|
|
GFP_KERNEL);
|
|
if (!per_cpu(cpu_msrs, i).controls)
|
|
goto fail;
|
|
}
|
|
|
|
if (!nmi_setup_mux())
|
|
goto fail;
|
|
|
|
return 1;
|
|
|
|
fail:
|
|
free_msrs();
|
|
return 0;
|
|
}
|
|
|
|
static void nmi_cpu_setup(void *dummy)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
|
nmi_cpu_save_registers(msrs);
|
|
spin_lock(&oprofilefs_lock);
|
|
model->setup_ctrs(model, msrs);
|
|
nmi_cpu_setup_mux(cpu, msrs);
|
|
spin_unlock(&oprofilefs_lock);
|
|
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
}
|
|
|
|
static struct notifier_block profile_exceptions_nb = {
|
|
.notifier_call = profile_exceptions_notify,
|
|
.next = NULL,
|
|
.priority = 2
|
|
};
|
|
|
|
static int nmi_setup(void)
|
|
{
|
|
int err = 0;
|
|
int cpu;
|
|
|
|
if (!allocate_msrs())
|
|
return -ENOMEM;
|
|
|
|
/* We need to serialize save and setup for HT because the subset
|
|
* of msrs are distinct for save and setup operations
|
|
*/
|
|
|
|
/* Assume saved/restored counters are the same on all CPUs */
|
|
err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
|
|
if (err)
|
|
goto fail;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (!cpu)
|
|
continue;
|
|
|
|
memcpy(per_cpu(cpu_msrs, cpu).counters,
|
|
per_cpu(cpu_msrs, 0).counters,
|
|
sizeof(struct op_msr) * model->num_counters);
|
|
|
|
memcpy(per_cpu(cpu_msrs, cpu).controls,
|
|
per_cpu(cpu_msrs, 0).controls,
|
|
sizeof(struct op_msr) * model->num_controls);
|
|
|
|
mux_clone(cpu);
|
|
}
|
|
|
|
err = register_die_notifier(&profile_exceptions_nb);
|
|
if (err)
|
|
goto fail;
|
|
|
|
on_each_cpu(nmi_cpu_setup, NULL, 1);
|
|
nmi_enabled = 1;
|
|
return 0;
|
|
fail:
|
|
free_msrs();
|
|
return err;
|
|
}
|
|
|
|
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
|
|
{
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *controls = msrs->controls;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < model->num_controls; ++i) {
|
|
if (controls[i].addr)
|
|
wrmsrl(controls[i].addr, controls[i].saved);
|
|
}
|
|
|
|
for (i = 0; i < model->num_counters; ++i) {
|
|
if (counters[i].addr)
|
|
wrmsrl(counters[i].addr, counters[i].saved);
|
|
}
|
|
}
|
|
|
|
static void nmi_cpu_shutdown(void *dummy)
|
|
{
|
|
unsigned int v;
|
|
int cpu = smp_processor_id();
|
|
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
|
|
|
/* restoring APIC_LVTPC can trigger an apic error because the delivery
|
|
* mode and vector nr combination can be illegal. That's by design: on
|
|
* power on apic lvt contain a zero vector nr which are legal only for
|
|
* NMI delivery mode. So inhibit apic err before restoring lvtpc
|
|
*/
|
|
v = apic_read(APIC_LVTERR);
|
|
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
|
|
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
|
|
apic_write(APIC_LVTERR, v);
|
|
nmi_cpu_restore_registers(msrs);
|
|
}
|
|
|
|
static void nmi_shutdown(void)
|
|
{
|
|
struct op_msrs *msrs;
|
|
|
|
nmi_enabled = 0;
|
|
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
|
|
unregister_die_notifier(&profile_exceptions_nb);
|
|
msrs = &get_cpu_var(cpu_msrs);
|
|
model->shutdown(msrs);
|
|
free_msrs();
|
|
put_cpu_var(cpu_msrs);
|
|
}
|
|
|
|
static int nmi_create_files(struct super_block *sb, struct dentry *root)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < model->num_virt_counters; ++i) {
|
|
struct dentry *dir;
|
|
char buf[4];
|
|
|
|
/* quick little hack to _not_ expose a counter if it is not
|
|
* available for use. This should protect userspace app.
|
|
* NOTE: assumes 1:1 mapping here (that counters are organized
|
|
* sequentially in their struct assignment).
|
|
*/
|
|
if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
|
|
continue;
|
|
|
|
snprintf(buf, sizeof(buf), "%d", i);
|
|
dir = oprofilefs_mkdir(sb, root, buf);
|
|
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
|
|
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
|
|
oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
|
|
oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
|
|
oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
|
|
oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
|
|
void *data)
|
|
{
|
|
int cpu = (unsigned long)data;
|
|
switch (action) {
|
|
case CPU_DOWN_FAILED:
|
|
case CPU_ONLINE:
|
|
smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
|
|
break;
|
|
case CPU_DOWN_PREPARE:
|
|
smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
|
|
break;
|
|
}
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
static struct notifier_block oprofile_cpu_nb = {
|
|
.notifier_call = oprofile_cpu_notifier
|
|
};
|
|
#endif
|
|
|
|
#ifdef CONFIG_PM
|
|
|
|
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
|
|
{
|
|
/* Only one CPU left, just stop that one */
|
|
if (nmi_enabled == 1)
|
|
nmi_cpu_stop(NULL);
|
|
return 0;
|
|
}
|
|
|
|
static int nmi_resume(struct sys_device *dev)
|
|
{
|
|
if (nmi_enabled == 1)
|
|
nmi_cpu_start(NULL);
|
|
return 0;
|
|
}
|
|
|
|
static struct sysdev_class oprofile_sysclass = {
|
|
.name = "oprofile",
|
|
.resume = nmi_resume,
|
|
.suspend = nmi_suspend,
|
|
};
|
|
|
|
static struct sys_device device_oprofile = {
|
|
.id = 0,
|
|
.cls = &oprofile_sysclass,
|
|
};
|
|
|
|
static int __init init_sysfs(void)
|
|
{
|
|
int error;
|
|
|
|
error = sysdev_class_register(&oprofile_sysclass);
|
|
if (!error)
|
|
error = sysdev_register(&device_oprofile);
|
|
return error;
|
|
}
|
|
|
|
static void exit_sysfs(void)
|
|
{
|
|
sysdev_unregister(&device_oprofile);
|
|
sysdev_class_unregister(&oprofile_sysclass);
|
|
}
|
|
|
|
#else
|
|
#define init_sysfs() do { } while (0)
|
|
#define exit_sysfs() do { } while (0)
|
|
#endif /* CONFIG_PM */
|
|
|
|
static int __init p4_init(char **cpu_type)
|
|
{
|
|
__u8 cpu_model = boot_cpu_data.x86_model;
|
|
|
|
if (cpu_model > 6 || cpu_model == 5)
|
|
return 0;
|
|
|
|
#ifndef CONFIG_SMP
|
|
*cpu_type = "i386/p4";
|
|
model = &op_p4_spec;
|
|
return 1;
|
|
#else
|
|
switch (smp_num_siblings) {
|
|
case 1:
|
|
*cpu_type = "i386/p4";
|
|
model = &op_p4_spec;
|
|
return 1;
|
|
|
|
case 2:
|
|
*cpu_type = "i386/p4-ht";
|
|
model = &op_p4_ht2_spec;
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
|
|
printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
|
|
return 0;
|
|
}
|
|
|
|
static int force_arch_perfmon;
|
|
static int force_cpu_type(const char *str, struct kernel_param *kp)
|
|
{
|
|
if (!strcmp(str, "arch_perfmon")) {
|
|
force_arch_perfmon = 1;
|
|
printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
|
|
|
|
static int __init ppro_init(char **cpu_type)
|
|
{
|
|
__u8 cpu_model = boot_cpu_data.x86_model;
|
|
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
|
|
|
|
if (force_arch_perfmon && cpu_has_arch_perfmon)
|
|
return 0;
|
|
|
|
switch (cpu_model) {
|
|
case 0 ... 2:
|
|
*cpu_type = "i386/ppro";
|
|
break;
|
|
case 3 ... 5:
|
|
*cpu_type = "i386/pii";
|
|
break;
|
|
case 6 ... 8:
|
|
case 10 ... 11:
|
|
*cpu_type = "i386/piii";
|
|
break;
|
|
case 9:
|
|
case 13:
|
|
*cpu_type = "i386/p6_mobile";
|
|
break;
|
|
case 14:
|
|
*cpu_type = "i386/core";
|
|
break;
|
|
case 15: case 23:
|
|
*cpu_type = "i386/core_2";
|
|
break;
|
|
case 0x2e:
|
|
case 26:
|
|
spec = &op_arch_perfmon_spec;
|
|
*cpu_type = "i386/core_i7";
|
|
break;
|
|
case 28:
|
|
*cpu_type = "i386/atom";
|
|
break;
|
|
default:
|
|
/* Unknown */
|
|
return 0;
|
|
}
|
|
|
|
model = spec;
|
|
return 1;
|
|
}
|
|
|
|
/* in order to get sysfs right */
|
|
static int using_nmi;
|
|
|
|
int __init op_nmi_init(struct oprofile_operations *ops)
|
|
{
|
|
__u8 vendor = boot_cpu_data.x86_vendor;
|
|
__u8 family = boot_cpu_data.x86;
|
|
char *cpu_type = NULL;
|
|
int ret = 0;
|
|
|
|
if (!cpu_has_apic)
|
|
return -ENODEV;
|
|
|
|
switch (vendor) {
|
|
case X86_VENDOR_AMD:
|
|
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
|
|
|
|
switch (family) {
|
|
case 6:
|
|
cpu_type = "i386/athlon";
|
|
break;
|
|
case 0xf:
|
|
/*
|
|
* Actually it could be i386/hammer too, but
|
|
* give user space an consistent name.
|
|
*/
|
|
cpu_type = "x86-64/hammer";
|
|
break;
|
|
case 0x10:
|
|
cpu_type = "x86-64/family10";
|
|
break;
|
|
case 0x11:
|
|
cpu_type = "x86-64/family11h";
|
|
break;
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
model = &op_amd_spec;
|
|
break;
|
|
|
|
case X86_VENDOR_INTEL:
|
|
switch (family) {
|
|
/* Pentium IV */
|
|
case 0xf:
|
|
p4_init(&cpu_type);
|
|
break;
|
|
|
|
/* A P6-class processor */
|
|
case 6:
|
|
ppro_init(&cpu_type);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (cpu_type)
|
|
break;
|
|
|
|
if (!cpu_has_arch_perfmon)
|
|
return -ENODEV;
|
|
|
|
/* use arch perfmon as fallback */
|
|
cpu_type = "i386/arch_perfmon";
|
|
model = &op_arch_perfmon_spec;
|
|
break;
|
|
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
register_cpu_notifier(&oprofile_cpu_nb);
|
|
#endif
|
|
/* default values, can be overwritten by model */
|
|
ops->create_files = nmi_create_files;
|
|
ops->setup = nmi_setup;
|
|
ops->shutdown = nmi_shutdown;
|
|
ops->start = nmi_start;
|
|
ops->stop = nmi_stop;
|
|
ops->cpu_type = cpu_type;
|
|
|
|
if (model->init)
|
|
ret = model->init(ops);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (!model->num_virt_counters)
|
|
model->num_virt_counters = model->num_counters;
|
|
|
|
mux_init(ops);
|
|
|
|
init_sysfs();
|
|
using_nmi = 1;
|
|
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
|
|
return 0;
|
|
}
|
|
|
|
void op_nmi_exit(void)
|
|
{
|
|
if (using_nmi) {
|
|
exit_sysfs();
|
|
#ifdef CONFIG_SMP
|
|
unregister_cpu_notifier(&oprofile_cpu_nb);
|
|
#endif
|
|
}
|
|
if (model->exit)
|
|
model->exit();
|
|
}
|