linux/arch/x86/kernel/cpu/perf_event_intel_bts.c
Linus Torvalds 9d90f03531 Replace module_init with appropriate alternate initcall in non modules.
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJVkO6nAAoJEOvOhAQsB9HWpHMP/Aknc+lmX2dZeIn96gdkP+UK
 1qL24C5oq2sm/9yTZLdoXbyApLaaTbAJHS9O4kolaOU6uOs3JrgtXqL1697PVp1R
 qV4f4DOzXmmEHaE2oO21afAri3tXIVQNqA2NQl2TmKfwz0Atu01Vj5RJPu/ZOBPl
 dONXcFnE6nO2p7AEFRP/GfDZwkng4xALyZPhwL7tJDAeGaBpqG/n2hCuq+Szn9g8
 wjTFACBdad/mRrYsL6YsWZ1e+LKI8vsArQbdPTam+jPaEUlK7yjFReFKCJVzL2JP
 xfQoTcCgFztzTUV0JTGR9sqeYA3WH9AkJOFDxNE/eIili4xiTh789WbEpHLVECSX
 1LsW025I3DkRWBPT4L+9ZP805ha71kNXDFc5N3XJkzrCYaFvD2BgsUzxi6FXj7aC
 9lEVKt6xO04FFG5SwTKnO0f8PEhPemZH3BDnVvjBDWQYLjUcPSNz7bfyHUhif0G5
 ulOGVB0ncJJF9iP8PyZs1RA/F8kKxXWnhYMIHzvl0f0vLUA7rAKsACnhBgq8s9ZQ
 uM5YjzU91Z/4pe5C2E5MmQIZ84b79ZPsee1lF0GJdjK5W3PDvnCjIdXfQ5M/f3S8
 76cssXWNhS78/P+19YqirLeb0u7Zw0jf73m9t9ywRgcByWfY5ZUDm0DFpQnWKkoR
 QY/aFO/yHKTO3VHj8Ril
 =KDJO
 -----END PGP SIGNATURE-----

Merge tag 'module_init-alternate_initcall-v4.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/paulg/linux

Pull module_init replacement part two from Paul Gortmaker:
 "Replace module_init with appropriate alternate initcall in non
  modules.

  This series converts non-modular code that is using the module_init()
  call to hook itself into the system to instead use one of our
  alternate priority initcalls.

  Unlike the previous series that used device_initcall and hence was a
  runtime no-op, these commits change to one of the alternate initcalls,
  because (a) we have them and (b) it seems like the right thing to do.

  For example, it would seem logical to use arch_initcall for arch
  specific setup code and fs_initcall for filesystem setup code.

  This does mean however, that changes in the init ordering will be
  taking place, and so there is a small risk that some kind of implicit
  init ordering issue may lie uncovered.  But I think it is still better
  to give these ones sensible priorities than to just assign them all to
  device_initcall in order to exactly preserve the old ordering.

  Thad said, we have already made similar changes in core kernel code in
  commit c96d6660dc ("kernel: audit/fix non-modular users of
  module_init in core code") without any regressions reported, so this
  type of change isn't without precedent.  It has also got the same
  local testing and linux-next coverage as all the other pull requests
  that I'm sending for this merge window have got.

  Once again, there is an unused module_exit function removal that shows
  up as an outlier upon casual inspection of the diffstat"

* tag 'module_init-alternate_initcall-v4.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/paulg/linux:
  x86: perf_event_intel_pt.c: use arch_initcall to hook in enabling
  x86: perf_event_intel_bts.c: use arch_initcall to hook in enabling
  mm/page_owner.c: use late_initcall to hook in enabling
  lib/list_sort: use late_initcall to hook in self tests
  arm: use subsys_initcall in non-modular pl320 IPC code
  powerpc: don't use module_init for non-modular core hugetlb code
  powerpc: use subsys_initcall for Freescale Local Bus
  x86: don't use module_init for non-modular core bootflag code
  netfilter: don't use module_init/exit in core IPV4 code
  fs/notify: don't use module_init for non-modular inotify_user code
  mm: replace module_init usages with subsys_initcall in nommu.c
2015-07-02 10:36:29 -07:00

534 lines
13 KiB
C

/*
* BTS PMU driver for perf
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#undef DEBUG
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/coredump.h>
#include <asm-generic/sizes.h>
#include <asm/perf_event.h>
#include "perf_event.h"
struct bts_ctx {
struct perf_output_handle handle;
struct debug_store ds_back;
int started;
};
static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
#define BTS_RECORD_SIZE 24
#define BTS_SAFETY_MARGIN 4080
struct bts_phys {
struct page *page;
unsigned long size;
unsigned long offset;
unsigned long displacement;
};
struct bts_buffer {
size_t real_size; /* multiple of BTS_RECORD_SIZE */
unsigned int nr_pages;
unsigned int nr_bufs;
unsigned int cur_buf;
bool snapshot;
local_t data_size;
local_t lost;
local_t head;
unsigned long end;
void **data_pages;
struct bts_phys buf[0];
};
struct pmu bts_pmu;
void intel_pmu_enable_bts(u64 config);
void intel_pmu_disable_bts(void);
static size_t buf_size(struct page *page)
{
return 1 << (PAGE_SHIFT + page_private(page));
}
static void *
bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
{
struct bts_buffer *buf;
struct page *page;
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
unsigned long offset;
size_t size = nr_pages << PAGE_SHIFT;
int pg, nbuf, pad;
/* count all the high order buffers */
for (pg = 0, nbuf = 0; pg < nr_pages;) {
page = virt_to_page(pages[pg]);
if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
return NULL;
pg += 1 << page_private(page);
nbuf++;
}
/*
* to avoid interrupts in overwrite mode, only allow one physical
*/
if (overwrite && nbuf > 1)
return NULL;
buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
if (!buf)
return NULL;
buf->nr_pages = nr_pages;
buf->nr_bufs = nbuf;
buf->snapshot = overwrite;
buf->data_pages = pages;
buf->real_size = size - size % BTS_RECORD_SIZE;
for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
unsigned int __nr_pages;
page = virt_to_page(pages[pg]);
__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
buf->buf[nbuf].page = page;
buf->buf[nbuf].offset = offset;
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
buf->buf[nbuf].size -= pad;
pg += __nr_pages;
offset += __nr_pages << PAGE_SHIFT;
}
return buf;
}
static void bts_buffer_free_aux(void *data)
{
kfree(data);
}
static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
{
return buf->buf[idx].offset + buf->buf[idx].displacement;
}
static void
bts_config_buffer(struct bts_buffer *buf)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct bts_phys *phys = &buf->buf[buf->cur_buf];
unsigned long index, thresh = 0, end = phys->size;
struct page *page = phys->page;
index = local_read(&buf->head);
if (!buf->snapshot) {
if (buf->end < phys->offset + buf_size(page))
end = buf->end - phys->offset - phys->displacement;
index -= phys->offset + phys->displacement;
if (end - index > BTS_SAFETY_MARGIN)
thresh = end - BTS_SAFETY_MARGIN;
else if (end - index > BTS_RECORD_SIZE)
thresh = end - BTS_RECORD_SIZE;
else
thresh = end;
}
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
ds->bts_index = ds->bts_buffer_base + index;
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
ds->bts_interrupt_threshold = !buf->snapshot
? ds->bts_buffer_base + thresh
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
}
static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
{
unsigned long index = head - phys->offset;
memset(page_address(phys->page) + index, 0, phys->size - index);
}
static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
{
if (buf->snapshot)
return false;
if (local_read(&buf->data_size) >= bts->handle.size ||
bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
return true;
return false;
}
static void bts_update(struct bts_ctx *bts)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct bts_buffer *buf = perf_get_aux(&bts->handle);
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
if (!buf)
return;
head = index + bts_buffer_offset(buf, buf->cur_buf);
old = local_xchg(&buf->head, head);
if (!buf->snapshot) {
if (old == head)
return;
if (ds->bts_index >= ds->bts_absolute_maximum)
local_inc(&buf->lost);
/*
* old and head are always in the same physical buffer, so we
* can subtract them to get the data size.
*/
local_add(head - old, &buf->data_size);
} else {
local_set(&buf->data_size, head);
}
}
static void __bts_event_start(struct perf_event *event)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct bts_buffer *buf = perf_get_aux(&bts->handle);
u64 config = 0;
if (!buf || bts_buffer_is_full(buf, bts))
return;
event->hw.state = 0;
if (!buf->snapshot)
config |= ARCH_PERFMON_EVENTSEL_INT;
if (!event->attr.exclude_kernel)
config |= ARCH_PERFMON_EVENTSEL_OS;
if (!event->attr.exclude_user)
config |= ARCH_PERFMON_EVENTSEL_USR;
bts_config_buffer(buf);
/*
* local barrier to make sure that ds configuration made it
* before we enable BTS
*/
wmb();
intel_pmu_enable_bts(config);
}
static void bts_event_start(struct perf_event *event, int flags)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
__bts_event_start(event);
/* PMI handler: this counter is running and likely generating PMIs */
ACCESS_ONCE(bts->started) = 1;
}
static void __bts_event_stop(struct perf_event *event)
{
/*
* No extra synchronization is mandated by the documentation to have
* BTS data stores globally visible.
*/
intel_pmu_disable_bts();
if (event->hw.state & PERF_HES_STOPPED)
return;
ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
}
static void bts_event_stop(struct perf_event *event, int flags)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
/* PMI handler: don't restart this counter */
ACCESS_ONCE(bts->started) = 0;
__bts_event_stop(event);
if (flags & PERF_EF_UPDATE)
bts_update(bts);
}
void intel_bts_enable_local(void)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
if (bts->handle.event && bts->started)
__bts_event_start(bts->handle.event);
}
void intel_bts_disable_local(void)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
if (bts->handle.event)
__bts_event_stop(bts->handle.event);
}
static int
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
{
unsigned long head, space, next_space, pad, gap, skip, wakeup;
unsigned int next_buf;
struct bts_phys *phys, *next_phys;
int ret;
if (buf->snapshot)
return 0;
head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
if (WARN_ON_ONCE(head != local_read(&buf->head)))
return -EINVAL;
phys = &buf->buf[buf->cur_buf];
space = phys->offset + phys->displacement + phys->size - head;
pad = space;
if (space > handle->size) {
space = handle->size;
space -= space % BTS_RECORD_SIZE;
}
if (space <= BTS_SAFETY_MARGIN) {
/* See if next phys buffer has more space */
next_buf = buf->cur_buf + 1;
if (next_buf >= buf->nr_bufs)
next_buf = 0;
next_phys = &buf->buf[next_buf];
gap = buf_size(phys->page) - phys->displacement - phys->size +
next_phys->displacement;
skip = pad + gap;
if (handle->size >= skip) {
next_space = next_phys->size;
if (next_space + skip > handle->size) {
next_space = handle->size - skip;
next_space -= next_space % BTS_RECORD_SIZE;
}
if (next_space > space || !space) {
if (pad)
bts_buffer_pad_out(phys, head);
ret = perf_aux_output_skip(handle, skip);
if (ret)
return ret;
/* Advance to next phys buffer */
phys = next_phys;
space = next_space;
head = phys->offset + phys->displacement;
/*
* After this, cur_buf and head won't match ds
* anymore, so we must not be racing with
* bts_update().
*/
buf->cur_buf = next_buf;
local_set(&buf->head, head);
}
}
}
/* Don't go far beyond wakeup watermark */
wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
handle->head;
if (space > wakeup) {
space = wakeup;
space -= space % BTS_RECORD_SIZE;
}
buf->end = head + space;
/*
* If we have no space, the lost notification would have been sent when
* we hit absolute_maximum - see bts_update()
*/
if (!space)
return -ENOSPC;
return 0;
}
int intel_bts_interrupt(void)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct perf_event *event = bts->handle.event;
struct bts_buffer *buf;
s64 old_head;
int err;
if (!event || !bts->started)
return 0;
buf = perf_get_aux(&bts->handle);
/*
* Skip snapshot counters: they don't use the interrupt, but
* there's no other way of telling, because the pointer will
* keep moving
*/
if (!buf || buf->snapshot)
return 0;
old_head = local_read(&buf->head);
bts_update(bts);
/* no new data */
if (old_head == local_read(&buf->head))
return 0;
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
!!local_xchg(&buf->lost, 0));
buf = perf_aux_output_begin(&bts->handle, event);
if (!buf)
return 1;
err = bts_buffer_reset(buf, &bts->handle);
if (err)
perf_aux_output_end(&bts->handle, 0, false);
return 1;
}
static void bts_event_del(struct perf_event *event, int mode)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct bts_buffer *buf = perf_get_aux(&bts->handle);
bts_event_stop(event, PERF_EF_UPDATE);
if (buf) {
if (buf->snapshot)
bts->handle.head =
local_xchg(&buf->data_size,
buf->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
!!local_xchg(&buf->lost, 0));
}
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
}
static int bts_event_add(struct perf_event *event, int mode)
{
struct bts_buffer *buf;
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int ret = -EBUSY;
event->hw.state = PERF_HES_STOPPED;
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
return -EBUSY;
if (bts->handle.event)
return -EBUSY;
buf = perf_aux_output_begin(&bts->handle, event);
if (!buf)
return -EINVAL;
ret = bts_buffer_reset(buf, &bts->handle);
if (ret) {
perf_aux_output_end(&bts->handle, 0, false);
return ret;
}
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
if (mode & PERF_EF_START) {
bts_event_start(event, 0);
if (hwc->state & PERF_HES_STOPPED) {
bts_event_del(event, 0);
return -EBUSY;
}
}
return 0;
}
static void bts_event_destroy(struct perf_event *event)
{
x86_release_hardware();
x86_del_exclusive(x86_lbr_exclusive_bts);
}
static int bts_event_init(struct perf_event *event)
{
int ret;
if (event->attr.type != bts_pmu.type)
return -ENOENT;
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
ret = x86_reserve_hardware();
if (ret) {
x86_del_exclusive(x86_lbr_exclusive_bts);
return ret;
}
event->destroy = bts_event_destroy;
return 0;
}
static void bts_event_read(struct perf_event *event)
{
}
static __init int bts_init(void)
{
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
return -ENODEV;
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
bts_pmu.task_ctx_nr = perf_sw_context;
bts_pmu.event_init = bts_event_init;
bts_pmu.add = bts_event_add;
bts_pmu.del = bts_event_del;
bts_pmu.start = bts_event_start;
bts_pmu.stop = bts_event_stop;
bts_pmu.read = bts_event_read;
bts_pmu.setup_aux = bts_buffer_setup_aux;
bts_pmu.free_aux = bts_buffer_free_aux;
return perf_pmu_register(&bts_pmu, "intel_bts", -1);
}
arch_initcall(bts_init);