2011-05-19 17:55:04 +00:00
|
|
|
/*
|
|
|
|
* Performance events ring-buffer code:
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
|
|
|
|
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
2011-12-29 22:09:01 +00:00
|
|
|
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
2011-05-19 17:55:04 +00:00
|
|
|
*
|
|
|
|
* For licensing details see kernel-base/COPYING
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/slab.h>
|
2013-10-31 09:19:59 +00:00
|
|
|
#include <linux/circ_buf.h>
|
2015-01-28 17:54:38 +00:00
|
|
|
#include <linux/poll.h>
|
2011-05-19 17:55:04 +00:00
|
|
|
|
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
static void perf_output_wakeup(struct perf_output_handle *handle)
|
|
|
|
{
|
2015-01-28 17:54:38 +00:00
|
|
|
atomic_set(&handle->rb->poll, POLLIN);
|
2011-05-19 17:55:04 +00:00
|
|
|
|
2011-06-27 12:41:57 +00:00
|
|
|
handle->event->pending_wakeup = 1;
|
|
|
|
irq_work_queue(&handle->event->pending);
|
2011-05-19 17:55:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to ensure a later event_id doesn't publish a head when a former
|
|
|
|
* event isn't done writing. However since we need to deal with NMIs we
|
|
|
|
* cannot fully serialize things.
|
|
|
|
*
|
|
|
|
* We only publish the head (and generate a wakeup) when the outer-most
|
|
|
|
* event completes.
|
|
|
|
*/
|
|
|
|
static void perf_output_get_handle(struct perf_output_handle *handle)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb = handle->rb;
|
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
local_inc(&rb->nest);
|
|
|
|
handle->wakeup = local_read(&rb->wakeup);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_output_put_handle(struct perf_output_handle *handle)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb = handle->rb;
|
|
|
|
unsigned long head;
|
|
|
|
|
|
|
|
again:
|
|
|
|
head = local_read(&rb->head);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IRQ/NMI can happen here, which means we can miss a head update.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!local_dec_and_test(&rb->nest))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/*
|
2013-10-28 12:55:29 +00:00
|
|
|
* Since the mmap() consumer (userspace) can run on a different CPU:
|
|
|
|
*
|
|
|
|
* kernel user
|
|
|
|
*
|
2013-11-25 10:49:10 +00:00
|
|
|
* if (LOAD ->data_tail) { LOAD ->data_head
|
|
|
|
* (A) smp_rmb() (C)
|
|
|
|
* STORE $data LOAD $data
|
|
|
|
* smp_wmb() (B) smp_mb() (D)
|
|
|
|
* STORE ->data_head STORE ->data_tail
|
|
|
|
* }
|
2013-10-28 12:55:29 +00:00
|
|
|
*
|
|
|
|
* Where A pairs with D, and B pairs with C.
|
|
|
|
*
|
2013-11-25 10:49:10 +00:00
|
|
|
* In our case (A) is a control dependency that separates the load of
|
|
|
|
* the ->data_tail and the stores of $data. In case ->data_tail
|
|
|
|
* indicates there is no room in the buffer to store $data we do not.
|
2013-10-28 12:55:29 +00:00
|
|
|
*
|
2013-11-25 10:49:10 +00:00
|
|
|
* D needs to be a full barrier since it separates the data READ
|
2013-10-28 12:55:29 +00:00
|
|
|
* from the tail WRITE.
|
|
|
|
*
|
|
|
|
* For B a WMB is sufficient since it separates two WRITEs, and for C
|
|
|
|
* an RMB is sufficient since it separates two READs.
|
|
|
|
*
|
|
|
|
* See perf_output_begin().
|
2011-05-19 17:55:04 +00:00
|
|
|
*/
|
2013-11-25 10:49:10 +00:00
|
|
|
smp_wmb(); /* B, matches C */
|
2011-05-19 17:55:04 +00:00
|
|
|
rb->user_page->data_head = head;
|
|
|
|
|
|
|
|
/*
|
2013-10-31 16:41:23 +00:00
|
|
|
* Now check if we missed an update -- rely on previous implied
|
|
|
|
* compiler barriers to force a re-read.
|
2011-05-19 17:55:04 +00:00
|
|
|
*/
|
|
|
|
if (unlikely(head != local_read(&rb->head))) {
|
|
|
|
local_inc(&rb->nest);
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handle->wakeup != local_read(&rb->wakeup))
|
|
|
|
perf_output_wakeup(handle);
|
|
|
|
|
|
|
|
out:
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
|
|
|
int perf_output_begin(struct perf_output_handle *handle,
|
2011-06-27 14:47:16 +00:00
|
|
|
struct perf_event *event, unsigned int size)
|
2011-05-19 17:55:04 +00:00
|
|
|
{
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
unsigned long tail, offset, head;
|
2013-10-31 16:36:25 +00:00
|
|
|
int have_lost, page_shift;
|
2011-05-19 17:55:04 +00:00
|
|
|
struct {
|
|
|
|
struct perf_event_header header;
|
|
|
|
u64 id;
|
|
|
|
u64 lost;
|
|
|
|
} lost_event;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
/*
|
|
|
|
* For inherited events we send all the output towards the parent.
|
|
|
|
*/
|
|
|
|
if (event->parent)
|
|
|
|
event = event->parent;
|
|
|
|
|
|
|
|
rb = rcu_dereference(event->rb);
|
2013-10-31 16:20:25 +00:00
|
|
|
if (unlikely(!rb))
|
2011-05-19 17:55:04 +00:00
|
|
|
goto out;
|
|
|
|
|
2013-10-31 16:20:25 +00:00
|
|
|
if (unlikely(!rb->nr_pages))
|
2011-05-19 17:55:04 +00:00
|
|
|
goto out;
|
|
|
|
|
2013-10-31 16:20:25 +00:00
|
|
|
handle->rb = rb;
|
|
|
|
handle->event = event;
|
|
|
|
|
2011-05-19 17:55:04 +00:00
|
|
|
have_lost = local_read(&rb->lost);
|
2013-10-31 16:20:25 +00:00
|
|
|
if (unlikely(have_lost)) {
|
2013-10-31 16:29:29 +00:00
|
|
|
size += sizeof(lost_event);
|
|
|
|
if (event->attr.sample_id_all)
|
|
|
|
size += event->id_header_size;
|
2011-05-19 17:55:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
perf_output_get_handle(handle);
|
|
|
|
|
|
|
|
do {
|
|
|
|
tail = ACCESS_ONCE(rb->user_page->data_tail);
|
|
|
|
offset = head = local_read(&rb->head);
|
2013-10-31 09:19:59 +00:00
|
|
|
if (!rb->overwrite &&
|
|
|
|
unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
|
2011-05-19 17:55:04 +00:00
|
|
|
goto fail;
|
2013-11-25 10:49:10 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The above forms a control dependency barrier separating the
|
|
|
|
* @tail load above from the data stores below. Since the @tail
|
|
|
|
* load is required to compute the branch to fail below.
|
|
|
|
*
|
|
|
|
* A, matches D; the full memory barrier userspace SHOULD issue
|
|
|
|
* after reading the data and before storing the new tail
|
|
|
|
* position.
|
|
|
|
*
|
|
|
|
* See perf_output_put_handle().
|
|
|
|
*/
|
|
|
|
|
2013-10-31 09:19:59 +00:00
|
|
|
head += size;
|
2011-05-19 17:55:04 +00:00
|
|
|
} while (local_cmpxchg(&rb->head, offset, head) != offset);
|
|
|
|
|
2013-10-31 16:25:38 +00:00
|
|
|
/*
|
2013-11-25 10:49:10 +00:00
|
|
|
* We rely on the implied barrier() by local_cmpxchg() to ensure
|
|
|
|
* none of the data stores below can be lifted up by the compiler.
|
2013-10-31 16:25:38 +00:00
|
|
|
*/
|
|
|
|
|
2013-10-31 16:20:25 +00:00
|
|
|
if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
|
2011-05-19 17:55:04 +00:00
|
|
|
local_add(rb->watermark, &rb->wakeup);
|
|
|
|
|
2013-10-31 16:36:25 +00:00
|
|
|
page_shift = PAGE_SHIFT + page_order(rb);
|
|
|
|
|
|
|
|
handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
|
|
|
|
offset &= (1UL << page_shift) - 1;
|
|
|
|
handle->addr = rb->data_pages[handle->page] + offset;
|
|
|
|
handle->size = (1UL << page_shift) - offset;
|
2011-05-19 17:55:04 +00:00
|
|
|
|
2013-10-31 16:20:25 +00:00
|
|
|
if (unlikely(have_lost)) {
|
2013-10-31 16:29:29 +00:00
|
|
|
struct perf_sample_data sample_data;
|
|
|
|
|
|
|
|
lost_event.header.size = sizeof(lost_event);
|
2011-05-19 17:55:04 +00:00
|
|
|
lost_event.header.type = PERF_RECORD_LOST;
|
|
|
|
lost_event.header.misc = 0;
|
|
|
|
lost_event.id = event->id;
|
|
|
|
lost_event.lost = local_xchg(&rb->lost, 0);
|
|
|
|
|
2013-10-31 16:29:29 +00:00
|
|
|
perf_event_header__init_id(&lost_event.header,
|
|
|
|
&sample_data, event);
|
2011-05-19 17:55:04 +00:00
|
|
|
perf_output_put(handle, lost_event);
|
|
|
|
perf_event__output_id_sample(event, handle, &sample_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
local_inc(&rb->lost);
|
|
|
|
perf_output_put_handle(handle);
|
|
|
|
out:
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
|
|
|
|
2012-08-07 13:20:38 +00:00
|
|
|
unsigned int perf_output_copy(struct perf_output_handle *handle,
|
2011-05-19 17:55:04 +00:00
|
|
|
const void *buf, unsigned int len)
|
|
|
|
{
|
2012-08-07 13:20:38 +00:00
|
|
|
return __output_copy(handle, buf, len);
|
2011-05-19 17:55:04 +00:00
|
|
|
}
|
|
|
|
|
2012-08-07 13:20:39 +00:00
|
|
|
unsigned int perf_output_skip(struct perf_output_handle *handle,
|
|
|
|
unsigned int len)
|
|
|
|
{
|
|
|
|
return __output_skip(handle, NULL, len);
|
|
|
|
}
|
|
|
|
|
2011-05-19 17:55:04 +00:00
|
|
|
void perf_output_end(struct perf_output_handle *handle)
|
|
|
|
{
|
|
|
|
perf_output_put_handle(handle);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
|
|
|
{
|
|
|
|
long max_size = perf_data_size(rb);
|
|
|
|
|
|
|
|
if (watermark)
|
|
|
|
rb->watermark = min(max_size, watermark);
|
|
|
|
|
|
|
|
if (!rb->watermark)
|
|
|
|
rb->watermark = max_size / 2;
|
|
|
|
|
|
|
|
if (flags & RING_BUFFER_WRITABLE)
|
2013-03-18 13:33:28 +00:00
|
|
|
rb->overwrite = 0;
|
|
|
|
else
|
|
|
|
rb->overwrite = 1;
|
2011-05-19 17:55:04 +00:00
|
|
|
|
|
|
|
atomic_set(&rb->refcount, 1);
|
perf: Fix loss of notification with multi-event
When you do:
$ perf record -e cycles,cycles,cycles noploop 10
You expect about 10,000 samples for each event, i.e., 10s at
1000samples/sec. However, this is not what's happening. You
get much fewer samples, maybe 3700 samples/event:
$ perf report -D | tail -15
Aggregated stats:
TOTAL events: 10998
MMAP events: 66
COMM events: 2
SAMPLE events: 10930
cycles stats:
TOTAL events: 3644
SAMPLE events: 3644
cycles stats:
TOTAL events: 3642
SAMPLE events: 3642
cycles stats:
TOTAL events: 3644
SAMPLE events: 3644
On a Intel Nehalem or even AMD64, there are 4 counters capable
of measuring cycles, so there is plenty of space to measure those
events without multiplexing (even with the NMI watchdog active).
And even with multiplexing, we'd expect roughly the same number
of samples per event.
The root of the problem was that when the event that caused the buffer
to become full was not the first event passed on the cmdline, the user
notification would get lost. The notification was sent to the file
descriptor of the overflowed event but the perf tool was not polling
on it. The perf tool aggregates all samples into a single buffer,
i.e., the buffer of the first event. Consequently, it assumes
notifications for any event will come via that descriptor.
The seemingly straight forward solution of moving the waitq into the
ringbuffer object doesn't work because of life-time issues. One could
perf_event_set_output() on a fd that you're also blocking on and cause
the old rb object to be freed while its waitq would still be
referenced by the blocked thread -> FAIL.
Therefore link all events to the ringbuffer and broadcast the wakeup
from the ringbuffer object to all possible events that could be waited
upon. This is rather ugly, and we're open to better solutions but it
works for now.
Reported-by: Stephane Eranian <eranian@google.com>
Finished-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-11-26 01:47:31 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&rb->event_list);
|
|
|
|
spin_lock_init(&rb->event_lock);
|
2011-05-19 17:55:04 +00:00
|
|
|
}
|
|
|
|
|
perf: Add API for PMUs to write to the AUX area
For pmus that wish to write data to ring buffer's AUX area, provide
perf_aux_output_{begin,end}() calls to initiate/commit data writes,
similarly to perf_output_{begin,end}. These also use the same output
handle structure. Also, similarly to software counterparts, these
will direct inherited events' output to parents' ring buffers.
After the perf_aux_output_begin() returns successfully, handle->size
is set to the maximum amount of data that can be written wrt aux_tail
pointer, so that no data that the user hasn't seen will be overwritten,
therefore this should always be called before hardware writing is
enabled. On success, this will return the pointer to pmu driver's
private structure allocated for this aux area by pmu::setup_aux. Same
pointer can also be retrieved using perf_get_aux() while hardware
writing is enabled.
PMU driver should pass the actual amount of data written as a parameter
to perf_aux_output_end(). All hardware writes should be completed and
visible before this one is called.
Additionally, perf_aux_output_skip() will adjust output handle and
aux_head in case some part of the buffer has to be skipped over to
maintain hardware's alignment constraints.
Nested writers are forbidden and guards are in place to catch such
attempts.
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-8-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:16 +00:00
|
|
|
/*
|
|
|
|
* This is called before hardware starts writing to the AUX area to
|
|
|
|
* obtain an output handle and make sure there's room in the buffer.
|
|
|
|
* When the capture completes, call perf_aux_output_end() to commit
|
|
|
|
* the recorded data to the buffer.
|
|
|
|
*
|
|
|
|
* The ordering is similar to that of perf_output_{begin,end}, with
|
|
|
|
* the exception of (B), which should be taken care of by the pmu
|
|
|
|
* driver, since ordering rules will differ depending on hardware.
|
|
|
|
*/
|
|
|
|
void *perf_aux_output_begin(struct perf_output_handle *handle,
|
|
|
|
struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct perf_event *output_event = event;
|
|
|
|
unsigned long aux_head, aux_tail;
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
|
|
|
|
if (output_event->parent)
|
|
|
|
output_event = output_event->parent;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since this will typically be open across pmu::add/pmu::del, we
|
|
|
|
* grab ring_buffer's refcount instead of holding rcu read lock
|
|
|
|
* to make sure it doesn't disappear under us.
|
|
|
|
*/
|
|
|
|
rb = ring_buffer_get(output_event);
|
|
|
|
if (!rb)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Nesting is not supported for AUX area, make sure nested
|
|
|
|
* writers are caught early
|
|
|
|
*/
|
|
|
|
if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
|
|
|
|
goto err_put;
|
|
|
|
|
|
|
|
aux_head = local_read(&rb->aux_head);
|
|
|
|
aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
|
|
|
|
|
|
|
|
handle->rb = rb;
|
|
|
|
handle->event = event;
|
|
|
|
handle->head = aux_head;
|
|
|
|
if (aux_head - aux_tail < perf_aux_size(rb))
|
|
|
|
handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
|
|
|
|
else
|
|
|
|
handle->size = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* handle->size computation depends on aux_tail load; this forms a
|
|
|
|
* control dependency barrier separating aux_tail load from aux data
|
|
|
|
* store that will be enabled on successful return
|
|
|
|
*/
|
|
|
|
if (!handle->size) { /* A, matches D */
|
|
|
|
event->pending_disable = 1;
|
|
|
|
perf_output_wakeup(handle);
|
|
|
|
local_set(&rb->aux_nest, 0);
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
return handle->rb->aux_priv;
|
|
|
|
|
|
|
|
err_put:
|
|
|
|
rb_free_aux(rb);
|
|
|
|
|
|
|
|
err:
|
|
|
|
ring_buffer_put(rb);
|
|
|
|
handle->event = NULL;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit the data written by hardware into the ring buffer by adjusting
|
|
|
|
* aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
|
|
|
|
* pmu driver's responsibility to observe ordering rules of the hardware,
|
|
|
|
* so that all the data is externally visible before this is called.
|
|
|
|
*/
|
|
|
|
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
|
|
|
bool truncated)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb = handle->rb;
|
|
|
|
unsigned long aux_head = local_read(&rb->aux_head);
|
|
|
|
u64 flags = 0;
|
|
|
|
|
|
|
|
if (truncated)
|
|
|
|
flags |= PERF_AUX_FLAG_TRUNCATED;
|
|
|
|
|
|
|
|
local_add(size, &rb->aux_head);
|
|
|
|
|
|
|
|
if (size || flags) {
|
|
|
|
/*
|
|
|
|
* Only send RECORD_AUX if we have something useful to communicate
|
|
|
|
*/
|
|
|
|
|
|
|
|
perf_event_aux_event(handle->event, aux_head, size, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
rb->user_page->aux_head = local_read(&rb->aux_head);
|
|
|
|
|
|
|
|
perf_output_wakeup(handle);
|
|
|
|
handle->event = NULL;
|
|
|
|
|
|
|
|
local_set(&rb->aux_nest, 0);
|
|
|
|
rb_free_aux(rb);
|
|
|
|
ring_buffer_put(rb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip over a given number of bytes in the AUX buffer, due to, for example,
|
|
|
|
* hardware's alignment constraints.
|
|
|
|
*/
|
|
|
|
int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb = handle->rb;
|
|
|
|
unsigned long aux_head;
|
|
|
|
|
|
|
|
if (size > handle->size)
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
local_add(size, &rb->aux_head);
|
|
|
|
|
|
|
|
handle->head = aux_head;
|
|
|
|
handle->size -= size;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *perf_get_aux(struct perf_output_handle *handle)
|
|
|
|
{
|
|
|
|
/* this is only valid between perf_aux_output_begin and *_end */
|
|
|
|
if (!handle->event)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return handle->rb->aux_priv;
|
|
|
|
}
|
|
|
|
|
2015-01-14 12:18:12 +00:00
|
|
|
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
|
|
|
|
|
|
|
|
static struct page *rb_alloc_aux_page(int node, int order)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
if (order > MAX_ORDER)
|
|
|
|
order = MAX_ORDER;
|
|
|
|
|
|
|
|
do {
|
|
|
|
page = alloc_pages_node(node, PERF_AUX_GFP, order);
|
|
|
|
} while (!page && order--);
|
|
|
|
|
|
|
|
if (page && order) {
|
|
|
|
/*
|
|
|
|
* Communicate the allocation size to the driver
|
|
|
|
*/
|
|
|
|
split_page(page, order);
|
|
|
|
SetPagePrivate(page);
|
|
|
|
set_page_private(page, order);
|
|
|
|
}
|
|
|
|
|
|
|
|
return page;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rb_free_aux_page(struct ring_buffer *rb, int idx)
|
|
|
|
{
|
|
|
|
struct page *page = virt_to_page(rb->aux_pages[idx]);
|
|
|
|
|
|
|
|
ClearPagePrivate(page);
|
|
|
|
page->mapping = NULL;
|
|
|
|
__free_page(page);
|
|
|
|
}
|
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
|
|
|
|
pgoff_t pgoff, int nr_pages, int flags)
|
|
|
|
{
|
|
|
|
bool overwrite = !(flags & RING_BUFFER_WRITABLE);
|
|
|
|
int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
|
2015-01-14 12:18:12 +00:00
|
|
|
int ret = -ENOMEM, max_order = 0;
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
|
|
|
|
if (!has_aux(event))
|
|
|
|
return -ENOTSUPP;
|
|
|
|
|
2015-01-14 12:18:13 +00:00
|
|
|
if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
|
2015-01-14 12:18:12 +00:00
|
|
|
/*
|
|
|
|
* We need to start with the max_order that fits in nr_pages,
|
|
|
|
* not the other way around, hence ilog2() and not get_order.
|
|
|
|
*/
|
|
|
|
max_order = ilog2(nr_pages);
|
|
|
|
|
2015-01-14 12:18:13 +00:00
|
|
|
/*
|
|
|
|
* PMU requests more than one contiguous chunks of memory
|
|
|
|
* for SW double buffering
|
|
|
|
*/
|
|
|
|
if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
|
|
|
|
!overwrite) {
|
|
|
|
if (!max_order)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
max_order--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node);
|
|
|
|
if (!rb->aux_pages)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
rb->free_aux = event->pmu->free_aux;
|
2015-01-14 12:18:12 +00:00
|
|
|
for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
struct page *page;
|
2015-01-14 12:18:12 +00:00
|
|
|
int last, order;
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
|
2015-01-14 12:18:12 +00:00
|
|
|
order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
|
|
|
|
page = rb_alloc_aux_page(node, order);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
if (!page)
|
|
|
|
goto out;
|
|
|
|
|
2015-01-14 12:18:12 +00:00
|
|
|
for (last = rb->aux_nr_pages + (1 << page_private(page));
|
|
|
|
last > rb->aux_nr_pages; rb->aux_nr_pages++)
|
|
|
|
rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
|
|
|
|
overwrite);
|
|
|
|
if (!rb->aux_priv)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* aux_pages (and pmu driver's private data, aux_priv) will be
|
|
|
|
* referenced in both producer's and consumer's contexts, thus
|
|
|
|
* we keep a refcount here to make sure either of the two can
|
|
|
|
* reference them safely.
|
|
|
|
*/
|
|
|
|
atomic_set(&rb->aux_refcount, 1);
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (!ret)
|
|
|
|
rb->aux_pgoff = pgoff;
|
|
|
|
else
|
|
|
|
rb_free_aux(rb);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __rb_free_aux(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
int pg;
|
|
|
|
|
|
|
|
if (rb->aux_priv) {
|
|
|
|
rb->free_aux(rb->aux_priv);
|
|
|
|
rb->free_aux = NULL;
|
|
|
|
rb->aux_priv = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (pg = 0; pg < rb->aux_nr_pages; pg++)
|
2015-01-14 12:18:12 +00:00
|
|
|
rb_free_aux_page(rb, pg);
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
|
|
|
|
kfree(rb->aux_pages);
|
|
|
|
rb->aux_nr_pages = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void rb_free_aux(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
if (atomic_dec_and_test(&rb->aux_refcount))
|
|
|
|
__rb_free_aux(rb);
|
|
|
|
}
|
|
|
|
|
2011-05-19 17:55:04 +00:00
|
|
|
#ifndef CONFIG_PERF_USE_VMALLOC
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Back perf_mmap() with regular GFP_KERNEL-0 pages.
|
|
|
|
*/
|
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
static struct page *
|
|
|
|
__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
|
2011-05-19 17:55:04 +00:00
|
|
|
{
|
|
|
|
if (pgoff > rb->nr_pages)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (pgoff == 0)
|
|
|
|
return virt_to_page(rb->user_page);
|
|
|
|
|
|
|
|
return virt_to_page(rb->data_pages[pgoff - 1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *perf_mmap_alloc_page(int cpu)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
int node;
|
|
|
|
|
|
|
|
node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
|
|
|
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
|
|
|
if (!page)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return page_address(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
unsigned long size;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
size = sizeof(struct ring_buffer);
|
|
|
|
size += nr_pages * sizeof(void *);
|
|
|
|
|
|
|
|
rb = kzalloc(size, GFP_KERNEL);
|
|
|
|
if (!rb)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
rb->user_page = perf_mmap_alloc_page(cpu);
|
|
|
|
if (!rb->user_page)
|
|
|
|
goto fail_user_page;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
|
|
rb->data_pages[i] = perf_mmap_alloc_page(cpu);
|
|
|
|
if (!rb->data_pages[i])
|
|
|
|
goto fail_data_pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
rb->nr_pages = nr_pages;
|
|
|
|
|
|
|
|
ring_buffer_init(rb, watermark, flags);
|
|
|
|
|
|
|
|
return rb;
|
|
|
|
|
|
|
|
fail_data_pages:
|
|
|
|
for (i--; i >= 0; i--)
|
|
|
|
free_page((unsigned long)rb->data_pages[i]);
|
|
|
|
|
|
|
|
free_page((unsigned long)rb->user_page);
|
|
|
|
|
|
|
|
fail_user_page:
|
|
|
|
kfree(rb);
|
|
|
|
|
|
|
|
fail:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_mmap_free_page(unsigned long addr)
|
|
|
|
{
|
|
|
|
struct page *page = virt_to_page((void *)addr);
|
|
|
|
|
|
|
|
page->mapping = NULL;
|
|
|
|
__free_page(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
void rb_free(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
perf_mmap_free_page((unsigned long)rb->user_page);
|
|
|
|
for (i = 0; i < rb->nr_pages; i++)
|
|
|
|
perf_mmap_free_page((unsigned long)rb->data_pages[i]);
|
|
|
|
kfree(rb);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
2013-03-19 14:35:09 +00:00
|
|
|
static int data_page_nr(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
return rb->nr_pages << page_order(rb);
|
|
|
|
}
|
2011-05-19 17:55:04 +00:00
|
|
|
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
static struct page *
|
|
|
|
__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
|
2011-05-19 17:55:04 +00:00
|
|
|
{
|
2013-03-19 14:35:09 +00:00
|
|
|
/* The '>' counts in the user page. */
|
|
|
|
if (pgoff > data_page_nr(rb))
|
2011-05-19 17:55:04 +00:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_mmap_unmark_page(void *addr)
|
|
|
|
{
|
|
|
|
struct page *page = vmalloc_to_page(addr);
|
|
|
|
|
|
|
|
page->mapping = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rb_free_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
void *base;
|
|
|
|
int i, nr;
|
|
|
|
|
|
|
|
rb = container_of(work, struct ring_buffer, work);
|
2013-03-19 14:35:09 +00:00
|
|
|
nr = data_page_nr(rb);
|
2011-05-19 17:55:04 +00:00
|
|
|
|
|
|
|
base = rb->user_page;
|
2013-03-19 14:35:09 +00:00
|
|
|
/* The '<=' counts in the user page. */
|
|
|
|
for (i = 0; i <= nr; i++)
|
2011-05-19 17:55:04 +00:00
|
|
|
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
|
|
|
|
|
|
|
|
vfree(base);
|
|
|
|
kfree(rb);
|
|
|
|
}
|
|
|
|
|
|
|
|
void rb_free(struct ring_buffer *rb)
|
|
|
|
{
|
|
|
|
schedule_work(&rb->work);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
|
|
|
|
{
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
unsigned long size;
|
|
|
|
void *all_buf;
|
|
|
|
|
|
|
|
size = sizeof(struct ring_buffer);
|
|
|
|
size += sizeof(void *);
|
|
|
|
|
|
|
|
rb = kzalloc(size, GFP_KERNEL);
|
|
|
|
if (!rb)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
INIT_WORK(&rb->work, rb_free_work);
|
|
|
|
|
|
|
|
all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
|
|
|
|
if (!all_buf)
|
|
|
|
goto fail_all_buf;
|
|
|
|
|
|
|
|
rb->user_page = all_buf;
|
|
|
|
rb->data_pages[0] = all_buf + PAGE_SIZE;
|
|
|
|
rb->page_order = ilog2(nr_pages);
|
2013-03-19 14:35:09 +00:00
|
|
|
rb->nr_pages = !!nr_pages;
|
2011-05-19 17:55:04 +00:00
|
|
|
|
|
|
|
ring_buffer_init(rb, watermark, flags);
|
|
|
|
|
|
|
|
return rb;
|
|
|
|
|
|
|
|
fail_all_buf:
|
|
|
|
kfree(rb);
|
|
|
|
|
|
|
|
fail:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
perf: Add AUX area to ring buffer for raw data streams
This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.
AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.
In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.
Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-01-14 12:18:11 +00:00
|
|
|
|
|
|
|
struct page *
|
|
|
|
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
|
|
|
|
{
|
|
|
|
if (rb->aux_nr_pages) {
|
|
|
|
/* above AUX space */
|
|
|
|
if (pgoff > rb->aux_pgoff + rb->aux_nr_pages)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* AUX space */
|
|
|
|
if (pgoff >= rb->aux_pgoff)
|
|
|
|
return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return __perf_mmap_to_page(rb, pgoff);
|
|
|
|
}
|