x86, ptrace: PEBS support

Polish the ds.h interface and add support for PEBS.

Ds.c is meant to be the resource allocator for per-thread and per-cpu
BTS and PEBS recording.
It is used by ptrace/utrace to provide execution tracing of debugged tasks.
It will be used by profilers (e.g. perfmon2).
It may be used by kernel debuggers to provide a kernel execution trace.

Changes in detail:
- guard DS and ptrace by CONFIG macros
- separate DS and BTS more clearly
- simplify field accesses
- add functions to manage PEBS buffers
- add simple protection/allocation mechanism
- added support for Atom

Opens:
- buffer overflow handling
  Currently, only circular buffers are supported. This is all we need
  for debugging. Profilers would want an overflow notification.
  This is planned to be added when perfmon2 is made to use the ds.h
  interface.
- utrace intermediate layer

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Markus Metzger 2008-04-08 11:01:58 +02:00 committed by Ingo Molnar
parent 492c2e476e
commit 93fa7636df
11 changed files with 1336 additions and 581 deletions

View File

@ -415,3 +415,21 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR config X86_DEBUGCTLMSR
def_bool y def_bool y
depends on !(M586MMX || M586TSC || M586 || M486 || M386) depends on !(M586MMX || M586TSC || M586 || M486 || M386)
config X86_DS
bool "Debug Store support"
default y
help
Add support for Debug Store.
This allows the kernel to provide a memory buffer to the hardware
to store various profiling and tracing events.
config X86_PTRACE_BTS
bool "ptrace interface to Branch Trace Store"
default y
depends on (X86_DS && X86_DEBUGCTLMSR)
help
Add a ptrace interface to allow collecting an execution trace
of the traced task.
This collects control flow changes in a (cyclic) buffer and allows
debuggers to fill in the gaps and show an execution trace of the debuggee.

View File

@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS); set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12))) if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS); set_cpu_cap(c, X86_FEATURE_PEBS);
ds_init_intel(c);
} }
if (cpu_has_bts) if (cpu_has_bts)
ds_init_intel(c); ptrace_bts_init_intel(c);
} }
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)

File diff suppressed because it is too large Load Diff

View File

@ -316,6 +316,14 @@ void exit_thread(void)
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu(); put_cpu();
} }
#ifdef CONFIG_X86_DS
/* Free any DS contexts that have not been properly released. */
if (unlikely(current->thread.ds_ctx)) {
/* we clear debugctl to make sure DS is not used. */
update_debugctlmsr(0);
ds_free(current->thread.ds_ctx);
}
#endif /* CONFIG_X86_DS */
} }
void flush_thread(void) void flush_thread(void)
@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
{ {
struct thread_struct *prev, *next; struct thread_struct *prev, *next;
unsigned long debugctl; unsigned long debugctl;
unsigned long ds_prev = 0, ds_next = 0;
prev = &prev_p->thread; prev = &prev_p->thread;
next = &next_p->thread; next = &next_p->thread;
debugctl = prev->debugctlmsr; debugctl = prev->debugctlmsr;
if (next->ds_area_msr != prev->ds_area_msr) {
#ifdef CONFIG_X86_DS
if (prev->ds_ctx)
ds_prev = (unsigned long)prev->ds_ctx->ds;
if (next->ds_ctx)
ds_next = (unsigned long)next->ds_ctx->ds;
if (ds_next != ds_prev) {
/* we clear debugctl to make sure DS /* we clear debugctl to make sure DS
* is not in use when we change it */ * is not in use when we change it */
debugctl = 0; debugctl = 0;
update_debugctlmsr(0); update_debugctlmsr(0);
wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
} }
#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl) if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr); update_debugctlmsr(next->debugctlmsr);
@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
hard_enable_TSC(); hard_enable_TSC();
} }
#ifdef X86_BTS #ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
#endif #endif /* CONFIG_X86_PTRACE_BTS */
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {

View File

@ -267,6 +267,14 @@ void exit_thread(void)
t->io_bitmap_max = 0; t->io_bitmap_max = 0;
put_cpu(); put_cpu();
} }
#ifdef CONFIG_X86_DS
/* Free any DS contexts that have not been properly released. */
if (unlikely(t->ds_ctx)) {
/* we clear debugctl to make sure DS is not used. */
update_debugctlmsr(0);
ds_free(t->ds_ctx);
}
#endif /* CONFIG_X86_DS */
} }
void flush_thread(void) void flush_thread(void)
@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
{ {
struct thread_struct *prev, *next; struct thread_struct *prev, *next;
unsigned long debugctl; unsigned long debugctl;
unsigned long ds_prev = 0, ds_next = 0;
prev = &prev_p->thread, prev = &prev_p->thread,
next = &next_p->thread; next = &next_p->thread;
debugctl = prev->debugctlmsr; debugctl = prev->debugctlmsr;
if (next->ds_area_msr != prev->ds_area_msr) {
#ifdef CONFIG_X86_DS
if (prev->ds_ctx)
ds_prev = (unsigned long)prev->ds_ctx->ds;
if (next->ds_ctx)
ds_next = (unsigned long)next->ds_ctx->ds;
if (ds_next != ds_prev) {
/* we clear debugctl to make sure DS /* we clear debugctl to make sure DS
* is not in use when we change it */ * is not in use when we change it */
debugctl = 0; debugctl = 0;
update_debugctlmsr(0); update_debugctlmsr(0);
wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); wrmsrl(MSR_IA32_DS_AREA, ds_next);
} }
#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl) if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr); update_debugctlmsr(next->debugctlmsr);
@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
} }
#ifdef X86_BTS #ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
#endif #endif /* CONFIG_X86_PTRACE_BTS */
} }
/* /*

View File

@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0; return 0;
} }
#ifdef X86_BTS #ifdef CONFIG_X86_PTRACE_BTS
/*
* The configuration for a particular BTS hardware implementation.
*/
struct bts_configuration {
/* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
unsigned char sizeof_bts;
/* the size of a field in the BTS record in bytes */
unsigned char sizeof_field;
/* a bitmask to enable/disable BTS in DEBUGCTL MSR */
unsigned long debugctl_mask;
};
static struct bts_configuration bts_cfg;
static int ptrace_bts_get_size(struct task_struct *child) #define BTS_MAX_RECORD_SIZE (8 * 3)
/*
* Branch Trace Store (BTS) uses the following format. Different
* architectures vary in the size of those fields.
* - source linear address
* - destination linear address
* - flags
*
* Later architectures use 64bit pointers throughout, whereas earlier
* architectures use 32bit pointers in 32bit mode.
*
* We compute the base address for the first 8 fields based on:
* - the field size stored in the DS configuration
* - the relative field position
*
* In order to store additional information in the BTS buffer, we use
* a special source address to indicate that the record requires
* special interpretation.
*
* Netburst indicated via a bit in the flags field whether the branch
* was predicted; this is ignored.
*/
enum bts_field {
bts_from = 0,
bts_to,
bts_flags,
bts_escape = (unsigned long)-1,
bts_qual = bts_to,
bts_jiffies = bts_flags
};
static inline unsigned long bts_get(const char *base, enum bts_field field)
{ {
if (!child->thread.ds_area_msr) base += (bts_cfg.sizeof_field * field);
return -ENXIO; return *(unsigned long *)base;
return ds_get_bts_index((void *)child->thread.ds_area_msr);
} }
static int ptrace_bts_read_record(struct task_struct *child, static inline void bts_set(char *base, enum bts_field field, unsigned long val)
long index, {
base += (bts_cfg.sizeof_field * field);;
(*(unsigned long *)base) = val;
}
/*
* Translate a BTS record from the raw format into the bts_struct format
*
* out (out): bts_struct interpretation
* raw: raw BTS record
*/
static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
{
memset(out, 0, sizeof(*out));
if (bts_get(raw, bts_from) == bts_escape) {
out->qualifier = bts_get(raw, bts_qual);
out->variant.jiffies = bts_get(raw, bts_jiffies);
} else {
out->qualifier = BTS_BRANCH;
out->variant.lbr.from_ip = bts_get(raw, bts_from);
out->variant.lbr.to_ip = bts_get(raw, bts_to);
}
}
static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out) struct bts_struct __user *out)
{ {
struct bts_struct ret; struct bts_struct ret;
int retval; const void *bts_record;
int bts_end; size_t bts_index, bts_end;
int bts_index; int error;
if (!child->thread.ds_area_msr) error = ds_get_bts_end(child, &bts_end);
return -ENXIO; if (error < 0)
return error;
if (index < 0)
return -EINVAL;
bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
if (bts_end <= index) if (bts_end <= index)
return -EINVAL; return -EINVAL;
/* translate the ptrace bts index into the ds bts index */ error = ds_get_bts_index(child, &bts_index);
bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); if (error < 0)
bts_index -= (index + 1); return error;
if (bts_index < 0)
bts_index += bts_end;
retval = ds_read_bts((void *)child->thread.ds_area_msr, /* translate the ptrace bts index into the ds bts index */
bts_index, &ret); bts_index += bts_end - (index + 1);
if (retval < 0) if (bts_end <= bts_index)
return retval; bts_index -= bts_end;
error = ds_access_bts(child, bts_index, &bts_record);
if (error < 0)
return error;
ptrace_bts_translate_record(&ret, bts_record);
if (copy_to_user(out, &ret, sizeof(ret))) if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT; return -EFAULT;
@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
return sizeof(ret); return sizeof(ret);
} }
static int ptrace_bts_clear(struct task_struct *child)
{
if (!child->thread.ds_area_msr)
return -ENXIO;
return ds_clear((void *)child->thread.ds_area_msr);
}
static int ptrace_bts_drain(struct task_struct *child, static int ptrace_bts_drain(struct task_struct *child,
long size, long size,
struct bts_struct __user *out) struct bts_struct __user *out)
{ {
int end, i; struct bts_struct ret;
void *ds = (void *)child->thread.ds_area_msr; const unsigned char *raw;
size_t end, i;
int error;
if (!ds) error = ds_get_bts_index(child, &end);
return -ENXIO; if (error < 0)
return error;
end = ds_get_bts_index(ds);
if (end <= 0)
return end;
if (size < (end * sizeof(struct bts_struct))) if (size < (end * sizeof(struct bts_struct)))
return -EIO; return -EIO;
for (i = 0; i < end; i++, out++) { error = ds_access_bts(child, 0, (const void **)&raw);
struct bts_struct ret; if (error < 0)
int retval; return error;
retval = ds_read_bts(ds, i, &ret); for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
if (retval < 0) ptrace_bts_translate_record(&ret, raw);
return retval;
if (copy_to_user(out, &ret, sizeof(ret))) if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT; return -EFAULT;
} }
ds_clear(ds); error = ds_clear_bts(child);
if (error < 0)
return error;
return end; return end;
} }
static void ptrace_bts_ovfl(struct task_struct *child)
{
send_sig(child->thread.bts_ovfl_signal, child, 0);
}
static int ptrace_bts_config(struct task_struct *child, static int ptrace_bts_config(struct task_struct *child,
long cfg_size, long cfg_size,
const struct ptrace_bts_config __user *ucfg) const struct ptrace_bts_config __user *ucfg)
{ {
struct ptrace_bts_config cfg; struct ptrace_bts_config cfg;
int bts_size, ret = 0; int error = 0;
void *ds;
error = -EOPNOTSUPP;
if (!bts_cfg.sizeof_bts)
goto errout;
error = -EIO;
if (cfg_size < sizeof(cfg)) if (cfg_size < sizeof(cfg))
return -EIO; goto errout;
error = -EFAULT;
if (copy_from_user(&cfg, ucfg, sizeof(cfg))) if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
return -EFAULT; goto errout;
if ((int)cfg.size < 0) error = -EINVAL;
return -EINVAL; if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
!(cfg.flags & PTRACE_BTS_O_ALLOC))
goto errout;
bts_size = 0; if (cfg.flags & PTRACE_BTS_O_ALLOC) {
ds = (void *)child->thread.ds_area_msr; ds_ovfl_callback_t ovfl = 0;
if (ds) { unsigned int sig = 0;
bts_size = ds_get_bts_size(ds);
if (bts_size < 0)
return bts_size;
}
cfg.size = PAGE_ALIGN(cfg.size);
if (bts_size != cfg.size) { /* we ignore the error in case we were not tracing child */
ret = ptrace_bts_realloc(child, cfg.size, (void)ds_release_bts(child);
cfg.flags & PTRACE_BTS_O_CUT_SIZE);
if (ret < 0) if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
goto errout;
sig = cfg.signal;
ovfl = ptrace_bts_ovfl;
}
error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl);
if (error < 0)
goto errout; goto errout;
ds = (void *)child->thread.ds_area_msr; child->thread.bts_ovfl_signal = sig;
} }
if (cfg.flags & PTRACE_BTS_O_SIGNAL) error = -EINVAL;
ret = ds_set_overflow(ds, DS_O_SIGNAL); if (!child->thread.ds_ctx && cfg.flags)
else
ret = ds_set_overflow(ds, DS_O_WRAP);
if (ret < 0)
goto errout; goto errout;
if (cfg.flags & PTRACE_BTS_O_TRACE) if (cfg.flags & PTRACE_BTS_O_TRACE)
child->thread.debugctlmsr |= ds_debugctl_mask(); child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
else else
child->thread.debugctlmsr &= ~ds_debugctl_mask(); child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
if (cfg.flags & PTRACE_BTS_O_SCHED) if (cfg.flags & PTRACE_BTS_O_SCHED)
set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
else else
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
ret = sizeof(cfg); error = sizeof(cfg);
out: out:
if (child->thread.debugctlmsr) if (child->thread.debugctlmsr)
@ -702,10 +777,10 @@ out:
else else
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
return ret; return error;
errout: errout:
child->thread.debugctlmsr &= ~ds_debugctl_mask(); child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
goto out; goto out;
} }
@ -714,119 +789,79 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size, long cfg_size,
struct ptrace_bts_config __user *ucfg) struct ptrace_bts_config __user *ucfg)
{ {
void *ds = (void *)child->thread.ds_area_msr;
struct ptrace_bts_config cfg; struct ptrace_bts_config cfg;
size_t end;
const void *base, *max;
int error;
if (cfg_size < sizeof(cfg)) if (cfg_size < sizeof(cfg))
return -EIO; return -EIO;
error = ds_get_bts_end(child, &end);
if (error < 0)
return error;
error = ds_access_bts(child, /* index = */ 0, &base);
if (error < 0)
return error;
error = ds_access_bts(child, /* index = */ end, &max);
if (error < 0)
return error;
memset(&cfg, 0, sizeof(cfg)); memset(&cfg, 0, sizeof(cfg));
cfg.size = (max - base);
if (ds) { cfg.signal = child->thread.bts_ovfl_signal;
cfg.size = ds_get_bts_size(ds);
if (ds_get_overflow(ds) == DS_O_SIGNAL)
cfg.flags |= PTRACE_BTS_O_SIGNAL;
if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
child->thread.debugctlmsr & ds_debugctl_mask())
cfg.flags |= PTRACE_BTS_O_TRACE;
if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
cfg.flags |= PTRACE_BTS_O_SCHED;
}
cfg.bts_size = sizeof(struct bts_struct); cfg.bts_size = sizeof(struct bts_struct);
if (cfg.signal)
cfg.flags |= PTRACE_BTS_O_SIGNAL;
if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
child->thread.debugctlmsr & bts_cfg.debugctl_mask)
cfg.flags |= PTRACE_BTS_O_TRACE;
if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
cfg.flags |= PTRACE_BTS_O_SCHED;
if (copy_to_user(ucfg, &cfg, sizeof(cfg))) if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
return -EFAULT; return -EFAULT;
return sizeof(cfg); return sizeof(cfg);
} }
static int ptrace_bts_write_record(struct task_struct *child, static int ptrace_bts_write_record(struct task_struct *child,
const struct bts_struct *in) const struct bts_struct *in)
{ {
int retval; unsigned char bts_record[BTS_MAX_RECORD_SIZE];
if (!child->thread.ds_area_msr) BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
return -ENXIO;
retval = ds_write_bts((void *)child->thread.ds_area_msr, in); memset(bts_record, 0, bts_cfg.sizeof_bts);
if (retval) switch (in->qualifier) {
return retval; case BTS_INVALID:
break;
return sizeof(*in); case BTS_BRANCH:
} bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
break;
static int ptrace_bts_realloc(struct task_struct *child, case BTS_TASK_ARRIVES:
int size, int reduce_size) case BTS_TASK_DEPARTS:
{ bts_set(bts_record, bts_from, bts_escape);
unsigned long rlim, vm; bts_set(bts_record, bts_qual, in->qualifier);
int ret, old_size; bts_set(bts_record, bts_jiffies, in->variant.jiffies);
break;
if (size < 0) default:
return -EINVAL; return -EINVAL;
old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
if (old_size < 0)
return old_size;
ret = ds_free((void **)&child->thread.ds_area_msr);
if (ret < 0)
goto out;
size >>= PAGE_SHIFT;
old_size >>= PAGE_SHIFT;
current->mm->total_vm -= old_size;
current->mm->locked_vm -= old_size;
if (size == 0)
goto out;
rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = current->mm->total_vm + size;
if (rlim < vm) {
ret = -ENOMEM;
if (!reduce_size)
goto out;
size = rlim - current->mm->total_vm;
if (size <= 0)
goto out;
} }
rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; /* The writing task will be the switched-to task on a context
vm = current->mm->locked_vm + size; * switch. It needs to write into the switched-from task's BTS
if (rlim < vm) { * buffer. */
ret = -ENOMEM; return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
if (!reduce_size)
goto out;
size = rlim - current->mm->locked_vm;
if (size <= 0)
goto out;
}
ret = ds_allocate((void **)&child->thread.ds_area_msr,
size << PAGE_SHIFT);
if (ret < 0)
goto out;
current->mm->total_vm += size;
current->mm->locked_vm += size;
out:
if (child->thread.ds_area_msr)
set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
else
clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
return ret;
} }
void ptrace_bts_take_timestamp(struct task_struct *tsk, void ptrace_bts_take_timestamp(struct task_struct *tsk,
@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
ptrace_bts_write_record(tsk, &rec); ptrace_bts_write_record(tsk, &rec);
} }
#endif /* X86_BTS */
static const struct bts_configuration bts_cfg_netburst = {
.sizeof_bts = sizeof(long) * 3,
.sizeof_field = sizeof(long),
.debugctl_mask = (1<<2)|(1<<3)|(1<<5)
};
static const struct bts_configuration bts_cfg_pentium_m = {
.sizeof_bts = sizeof(long) * 3,
.sizeof_field = sizeof(long),
.debugctl_mask = (1<<6)|(1<<7)
};
static const struct bts_configuration bts_cfg_core2 = {
.sizeof_bts = 8 * 3,
.sizeof_field = 8,
.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
};
static inline void bts_configure(const struct bts_configuration *cfg)
{
bts_cfg = *cfg;
}
void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
{
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
case 0xF: /* Core2 */
case 0x1C: /* Atom */
bts_configure(&bts_cfg_core2);
break;
default:
/* sorry, don't know about them */
break;
}
break;
case 0xF:
switch (c->x86_model) {
case 0x0:
case 0x1:
case 0x2: /* Netburst */
bts_configure(&bts_cfg_netburst);
break;
default:
/* sorry, don't know about them */
break;
}
break;
default:
/* sorry, don't know about them */
break;
}
}
#endif /* CONFIG_X86_PTRACE_BTS */
/* /*
* Called by kernel/ptrace.c when detaching.. * Called by kernel/ptrace.c when detaching..
@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU #ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif #endif
if (child->thread.ds_area_msr) { #ifdef CONFIG_X86_PTRACE_BTS
#ifdef X86_BTS (void)ds_release_bts(child);
ptrace_bts_realloc(child, 0, 0);
#endif child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
child->thread.debugctlmsr &= ~ds_debugctl_mask(); if (!child->thread.debugctlmsr)
if (!child->thread.debugctlmsr) clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
} #endif /* CONFIG_X86_PTRACE_BTS */
} }
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
/* /*
* These bits need more cooking - not enabled yet: * These bits need more cooking - not enabled yet:
*/ */
#ifdef X86_BTS #ifdef CONFIG_X86_PTRACE_BTS
case PTRACE_BTS_CONFIG: case PTRACE_BTS_CONFIG:
ret = ptrace_bts_config ret = ptrace_bts_config
(child, data, (struct ptrace_bts_config __user *)addr); (child, data, (struct ptrace_bts_config __user *)addr);
@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break; break;
case PTRACE_BTS_SIZE: case PTRACE_BTS_SIZE:
ret = ptrace_bts_get_size(child); ret = ds_get_bts_index(child, /* pos = */ 0);
break; break;
case PTRACE_BTS_GET: case PTRACE_BTS_GET:
@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break; break;
case PTRACE_BTS_CLEAR: case PTRACE_BTS_CLEAR:
ret = ptrace_bts_clear(child); ret = ds_clear_bts(child);
break; break;
case PTRACE_BTS_DRAIN: case PTRACE_BTS_DRAIN:
ret = ptrace_bts_drain ret = ptrace_bts_drain
(child, data, (struct bts_struct __user *) addr); (child, data, (struct bts_struct __user *) addr);
break; break;
#endif #endif /* CONFIG_X86_PTRACE_BTS */
default: default:
ret = ptrace_request(child, request, addr, data); ret = ptrace_request(child, request, addr, data);

View File

@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS); set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12))) if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS); set_cpu_cap(c, X86_FEATURE_PEBS);
ds_init_intel(c);
} }
if (cpu_has_bts) if (cpu_has_bts)
ds_init_intel(c); ptrace_bts_init_intel(c);
n = c->extended_cpuid_level; n = c->extended_cpuid_level;
if (n >= 0x80000008) { if (n >= 0x80000008) {

View File

@ -2,71 +2,237 @@
* Debug Store (DS) support * Debug Store (DS) support
* *
* This provides a low-level interface to the hardware's Debug Store * This provides a low-level interface to the hardware's Debug Store
* feature that is used for last branch recording (LBR) and * feature that is used for branch trace store (BTS) and
* precise-event based sampling (PEBS). * precise-event based sampling (PEBS).
* *
* Different architectures use a different DS layout/pointer size. * It manages:
* The below functions therefore work on a void*. * - per-thread and per-cpu allocation of BTS and PEBS
* - buffer memory allocation (optional)
* - buffer overflow handling
* - buffer access
*
* It assumes:
* - get_task_struct on all parameter tasks
* - current is allowed to trace parameter tasks
* *
* *
* Since there is no user for PEBS, yet, only LBR (or branch * Copyright (C) 2007-2008 Intel Corporation.
* trace store, BTS) is supported. * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
*
*
* Copyright (C) 2007 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
*/ */
#ifndef _ASM_X86_DS_H #ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H #define _ASM_X86_DS_H
#ifdef CONFIG_X86_DS
#include <linux/types.h> #include <linux/types.h>
#include <linux/init.h> #include <linux/init.h>
struct cpuinfo_x86;
struct task_struct;
/* a branch trace record entry /*
* Request BTS or PEBS
* *
* In order to unify the interface between various processor versions, * Due to alignement constraints, the actual buffer may be slightly
* we use the below data structure for all processors. * smaller than the requested or provided buffer.
*
* Returns 0 on success; -Eerrno otherwise
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* base: the base pointer for the (non-pageable) buffer;
* NULL if buffer allocation requested
* size: the size of the requested or provided buffer
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
*/ */
enum bts_qualifier { typedef void (*ds_ovfl_callback_t)(struct task_struct *);
BTS_INVALID = 0, extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
BTS_BRANCH, ds_ovfl_callback_t ovfl);
BTS_TASK_ARRIVES, extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
BTS_TASK_DEPARTS ds_ovfl_callback_t ovfl);
/*
* Release BTS or PEBS resources
*
* Frees buffers allocated on ds_request.
*
* Returns 0 on success; -Eerrno otherwise
*
* task: the task to release resources for;
* NULL to release resources for the current cpu
*/
extern int ds_release_bts(struct task_struct *task);
extern int ds_release_pebs(struct task_struct *task);
/*
* Return the (array) index of the write pointer.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
*/
extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
/*
* Return the (array) index one record beyond the end of the array.
* (assuming an array of BTS/PEBS records)
*
* Returns -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* pos (out): if not NULL, will hold the result
*/
extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
/*
* Provide a pointer to the BTS/PEBS record at parameter index.
* (assuming an array of BTS/PEBS records)
*
* The pointer points directly into the buffer. The user is
* responsible for copying the record.
*
* Returns the size of a single record on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* index: the index of the requested record
* record (out): pointer to the requested record
*/
extern int ds_access_bts(struct task_struct *task,
size_t index, const void **record);
extern int ds_access_pebs(struct task_struct *task,
size_t index, const void **record);
/*
* Write one or more BTS/PEBS records at the write pointer index and
* advance the write pointer.
*
* If size is not a multiple of the record size, trailing bytes are
* zeroed out.
*
* May result in one or more overflow notifications.
*
* If called during overflow handling, that is, with index >=
* interrupt threshold, the write will wrap around.
*
* An overflow notification is given if and when the interrupt
* threshold is reached during or after the write.
*
* Returns the number of bytes written or -Eerrno.
*
* task: the task to access;
* NULL to access the current cpu
* buffer: the buffer to write
* size: the size of the buffer
*/
extern int ds_write_bts(struct task_struct *task,
const void *buffer, size_t size);
extern int ds_write_pebs(struct task_struct *task,
const void *buffer, size_t size);
/*
* Same as ds_write_bts/pebs, but omit ownership checks.
*
* This is needed to have some other task than the owner of the
* BTS/PEBS buffer or the parameter task itself write into the
* respective buffer.
*/
extern int ds_unchecked_write_bts(struct task_struct *task,
const void *buffer, size_t size);
extern int ds_unchecked_write_pebs(struct task_struct *task,
const void *buffer, size_t size);
/*
* Reset the write pointer of the BTS/PEBS buffer.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
*/
extern int ds_reset_bts(struct task_struct *task);
extern int ds_reset_pebs(struct task_struct *task);
/*
* Clear the BTS/PEBS buffer and reset the write pointer.
* The entire buffer will be zeroed out.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
*/
extern int ds_clear_bts(struct task_struct *task);
extern int ds_clear_pebs(struct task_struct *task);
/*
* Provide the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* value (out): the counter reset value
*/
extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
/*
* Set the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
* task: the task to access;
* NULL to access the current cpu
* value: the new counter reset value
*/
extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
/*
* Initialization
*/
struct cpuinfo_x86;
extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
/*
* The DS context - part of struct thread_struct.
*/
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
unsigned char *ds;
/* the owner of the BTS and PEBS configuration, respectively */
struct task_struct *owner[2];
/* buffer overflow notification function for BTS and PEBS */
ds_ovfl_callback_t callback[2];
/* the original buffer address */
void *buffer[2];
/* the number of allocated pages for on-request allocated buffers */
unsigned int pages[2];
/* use count */
unsigned long count;
/* a pointer to the context location inside the thread_struct
* or the per_cpu context array */
struct ds_context **this;
/* a pointer to the task owning this context, or NULL, if the
* context is owned by a cpu */
struct task_struct *task;
}; };
struct bts_struct { /* called by exit_thread() to free leftover contexts */
u64 qualifier; extern void ds_free(struct ds_context *context);
union {
/* BTS_BRANCH */
struct {
u64 from_ip;
u64 to_ip;
} lbr;
/* BTS_TASK_ARRIVES or
BTS_TASK_DEPARTS */
u64 jiffies;
} variant;
};
/* Overflow handling mechanisms */ #else /* CONFIG_X86_DS */
#define DS_O_SIGNAL 1 /* send overflow signal */
#define DS_O_WRAP 2 /* wrap around */
extern int ds_allocate(void **, size_t); #define ds_init_intel(config) do {} while (0)
extern int ds_free(void **);
extern int ds_get_bts_size(void *);
extern int ds_get_bts_end(void *);
extern int ds_get_bts_index(void *);
extern int ds_set_overflow(void *, int);
extern int ds_get_overflow(void *);
extern int ds_clear(void *);
extern int ds_read_bts(void *, int, struct bts_struct *);
extern int ds_write_bts(void *, const struct bts_struct *);
extern unsigned long ds_debugctl_mask(void);
extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */ #endif /* _ASM_X86_DS_H */

View File

@ -20,6 +20,7 @@ struct mm_struct;
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/desc_defs.h> #include <asm/desc_defs.h>
#include <asm/nops.h> #include <asm/nops.h>
#include <asm/ds.h>
#include <linux/personality.h> #include <linux/personality.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
@ -415,9 +416,14 @@ struct thread_struct {
unsigned io_bitmap_max; unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr; unsigned long debugctlmsr;
/* Debug Store - if not 0 points to a DS Save Area configuration; #ifdef CONFIG_X86_DS
* goes into MSR_IA32_DS_AREA */ /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
unsigned long ds_area_msr; struct ds_context *ds_ctx;
#endif /* CONFIG_X86_DS */
#ifdef CONFIG_X86_PTRACE_BTS
/* the signal to send on a bts buffer overflow */
unsigned int bts_ovfl_signal;
#endif /* CONFIG_X86_PTRACE_BTS */
}; };
static inline unsigned long native_get_debugreg(int regno) static inline unsigned long native_get_debugreg(int regno)

View File

@ -80,8 +80,9 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_PTRACE_BTS
#ifndef __ASSEMBLY__
#include <asm/types.h> #include <asm/types.h>
/* configuration/status structure used in PTRACE_BTS_CONFIG and /* configuration/status structure used in PTRACE_BTS_CONFIG and
@ -97,20 +98,20 @@ struct ptrace_bts_config {
/* actual size of bts_struct in bytes */ /* actual size of bts_struct in bytes */
__u32 bts_size; __u32 bts_size;
}; };
#endif #endif /* __ASSEMBLY__ */
#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
instead of wrapping around */ instead of wrapping around */
#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
instead of failing */
#define PTRACE_BTS_CONFIG 40 #define PTRACE_BTS_CONFIG 40
/* Configure branch trace recording. /* Configure branch trace recording.
ADDR points to a struct ptrace_bts_config. ADDR points to a struct ptrace_bts_config.
DATA gives the size of that buffer. DATA gives the size of that buffer.
A new buffer is allocated, iff the size changes. A new buffer is allocated, if requested in the flags.
An overflow signal may only be requested for new buffers.
Returns the number of bytes read. Returns the number of bytes read.
*/ */
#define PTRACE_BTS_STATUS 41 #define PTRACE_BTS_STATUS 41
@ -119,7 +120,7 @@ struct ptrace_bts_config {
Returns the number of bytes written. Returns the number of bytes written.
*/ */
#define PTRACE_BTS_SIZE 42 #define PTRACE_BTS_SIZE 42
/* Return the number of available BTS records. /* Return the number of available BTS records for draining.
DATA and ADDR are ignored. DATA and ADDR are ignored.
*/ */
#define PTRACE_BTS_GET 43 #define PTRACE_BTS_GET 43
@ -139,5 +140,6 @@ struct ptrace_bts_config {
BTS records are read from oldest to newest. BTS records are read from oldest to newest.
Returns number of BTS records drained. Returns number of BTS records drained.
*/ */
#endif /* CONFIG_X86_PTRACE_BTS */
#endif #endif

View File

@ -125,14 +125,48 @@ struct pt_regs {
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* !__i386__ */ #endif /* !__i386__ */
#ifdef CONFIG_X86_PTRACE_BTS
/* a branch trace record entry
*
* In order to unify the interface between various processor versions,
* we use the below data structure for all processors.
*/
enum bts_qualifier {
BTS_INVALID = 0,
BTS_BRANCH,
BTS_TASK_ARRIVES,
BTS_TASK_DEPARTS
};
struct bts_struct {
__u64 qualifier;
union {
/* BTS_BRANCH */
struct {
__u64 from_ip;
__u64 to_ip;
} lbr;
/* BTS_TASK_ARRIVES or
BTS_TASK_DEPARTS */
__u64 jiffies;
} variant;
};
#endif /* CONFIG_X86_PTRACE_BTS */
#ifdef __KERNEL__ #ifdef __KERNEL__
/* the DS BTS struct is used for ptrace as well */ #include <linux/init.h>
#include <asm/ds.h>
struct cpuinfo_x86;
struct task_struct; struct task_struct;
#ifdef CONFIG_X86_PTRACE_BTS
extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
#else
#define ptrace_bts_init_intel(config) do {} while (0)
#endif /* CONFIG_X86_PTRACE_BTS */
extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long profile_pc(struct pt_regs *regs);