forked from Minki/linux
perf cpumap: Add range data encoding
Often cpumaps encode a range of all CPUs, add a compact encoding that doesn't require a bit mask or list of all CPUs. Signed-off-by: Ian Rogers <irogers@google.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Colin Ian King <colin.king@intel.com> Cc: Dave Marchevsky <davemarchevsky@fb.com> Cc: German Gomez <german.gomez@arm.com> Cc: Gustavo A. R. Silva <gustavoars@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Kees Kook <keescook@chromium.org> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Riccardo Mancini <rickyman7@gmail.com> Cc: Song Liu <songliubraving@fb.com> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20220614143353.1559597-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d773c999b8
commit
c7202d20fb
@ -153,6 +153,7 @@ struct perf_record_header_attr {
|
||||
enum {
|
||||
PERF_CPU_MAP__CPUS = 0,
|
||||
PERF_CPU_MAP__MASK = 1,
|
||||
PERF_CPU_MAP__RANGE_CPUS = 2,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -195,6 +196,17 @@ struct perf_record_mask_cpu_map64 {
|
||||
#pragma GCC diagnostic ignored "-Wpacked"
|
||||
#pragma GCC diagnostic ignored "-Wattributes"
|
||||
|
||||
/*
|
||||
* An encoding of a CPU map for a range starting at start_cpu through to
|
||||
* end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present.
|
||||
*/
|
||||
struct perf_record_range_cpu_map {
|
||||
__u8 any_cpu;
|
||||
__u8 __pad;
|
||||
__u16 start_cpu;
|
||||
__u16 end_cpu;
|
||||
};
|
||||
|
||||
struct __packed perf_record_cpu_map_data {
|
||||
__u16 type;
|
||||
union {
|
||||
@ -204,6 +216,8 @@ struct __packed perf_record_cpu_map_data {
|
||||
struct perf_record_mask_cpu_map32 mask32_data;
|
||||
/* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */
|
||||
struct perf_record_mask_cpu_map64 mask64_data;
|
||||
/* Used when type == PERF_CPU_MAP__RANGE_CPUS. */
|
||||
struct perf_record_range_cpu_map range_cpu_data;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -19,7 +19,6 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
|
||||
struct perf_record_cpu_map *map_event = &event->cpu_map;
|
||||
struct perf_record_cpu_map_data *data;
|
||||
struct perf_cpu_map *map;
|
||||
int i;
|
||||
unsigned int long_size;
|
||||
|
||||
data = &map_event->data;
|
||||
@ -32,16 +31,17 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
|
||||
|
||||
TEST_ASSERT_VAL("wrong nr", data->mask32_data.nr == 1);
|
||||
|
||||
for (i = 0; i < 20; i++) {
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(0, data));
|
||||
TEST_ASSERT_VAL("wrong cpu", !perf_record_cpu_map_data__test_bit(1, data));
|
||||
for (int i = 2; i <= 20; i++)
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(i, data));
|
||||
}
|
||||
|
||||
map = cpu_map__new_data(data);
|
||||
TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 20);
|
||||
|
||||
for (i = 0; i < 20; i++) {
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i).cpu == i);
|
||||
}
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 0);
|
||||
for (int i = 2; i <= 20; i++)
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i - 1).cpu == i);
|
||||
|
||||
perf_cpu_map__put(map);
|
||||
return 0;
|
||||
@ -73,25 +73,59 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_event_range_cpus(struct perf_tool *tool __maybe_unused,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample __maybe_unused,
|
||||
struct machine *machine __maybe_unused)
|
||||
{
|
||||
struct perf_record_cpu_map *map_event = &event->cpu_map;
|
||||
struct perf_record_cpu_map_data *data;
|
||||
struct perf_cpu_map *map;
|
||||
|
||||
data = &map_event->data;
|
||||
|
||||
TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__RANGE_CPUS);
|
||||
|
||||
TEST_ASSERT_VAL("wrong any_cpu", data->range_cpu_data.any_cpu == 0);
|
||||
TEST_ASSERT_VAL("wrong start_cpu", data->range_cpu_data.start_cpu == 1);
|
||||
TEST_ASSERT_VAL("wrong end_cpu", data->range_cpu_data.end_cpu == 256);
|
||||
|
||||
map = cpu_map__new_data(data);
|
||||
TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 256);
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 1);
|
||||
TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__max(map).cpu == 256);
|
||||
TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
|
||||
perf_cpu_map__put(map);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
|
||||
{
|
||||
struct perf_cpu_map *cpus;
|
||||
|
||||
/* This one is better stores in mask. */
|
||||
cpus = perf_cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
|
||||
/* This one is better stored in a mask. */
|
||||
cpus = perf_cpu_map__new("0,2-20");
|
||||
|
||||
TEST_ASSERT_VAL("failed to synthesize map",
|
||||
!perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
|
||||
|
||||
perf_cpu_map__put(cpus);
|
||||
|
||||
/* This one is better stores in cpu values. */
|
||||
/* This one is better stored in cpu values. */
|
||||
cpus = perf_cpu_map__new("1,256");
|
||||
|
||||
TEST_ASSERT_VAL("failed to synthesize map",
|
||||
!perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
|
||||
|
||||
perf_cpu_map__put(cpus);
|
||||
|
||||
/* This one is better stored as a range. */
|
||||
cpus = perf_cpu_map__new("1-256");
|
||||
|
||||
TEST_ASSERT_VAL("failed to synthesize map",
|
||||
!perf_event__synthesize_cpu_map(NULL, cpus, process_event_range_cpus, NULL));
|
||||
|
||||
perf_cpu_map__put(cpus);
|
||||
return 0;
|
||||
}
|
||||
|
@ -112,12 +112,39 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
|
||||
|
||||
}
|
||||
|
||||
static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data)
|
||||
{
|
||||
struct perf_cpu_map *map;
|
||||
unsigned int i = 0;
|
||||
|
||||
map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu -
|
||||
data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu);
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
if (data->range_cpu_data.any_cpu)
|
||||
map->map[i++].cpu = -1;
|
||||
|
||||
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
|
||||
i++, cpu++)
|
||||
map->map[i].cpu = cpu;
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
|
||||
{
|
||||
if (data->type == PERF_CPU_MAP__CPUS)
|
||||
switch (data->type) {
|
||||
case PERF_CPU_MAP__CPUS:
|
||||
return cpu_map__from_entries(data);
|
||||
else
|
||||
case PERF_CPU_MAP__MASK:
|
||||
return cpu_map__from_mask(data);
|
||||
case PERF_CPU_MAP__RANGE_CPUS:
|
||||
return cpu_map__from_range(data);
|
||||
default:
|
||||
pr_err("cpu_map__new_data unknown type %d\n", data->type);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
|
||||
|
@ -943,6 +943,11 @@ static void perf_event__cpu_map_swap(union perf_event *event,
|
||||
default:
|
||||
pr_err("cpu_map swap: unsupported long size\n");
|
||||
}
|
||||
break;
|
||||
case PERF_CPU_MAP__RANGE_CPUS:
|
||||
data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
|
||||
data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1195,93 +1195,97 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
|
||||
return err;
|
||||
}
|
||||
|
||||
static void synthesize_cpus(struct perf_record_cpu_map_data *data,
|
||||
const struct perf_cpu_map *map)
|
||||
struct synthesize_cpu_map_data {
|
||||
const struct perf_cpu_map *map;
|
||||
int nr;
|
||||
int min_cpu;
|
||||
int max_cpu;
|
||||
int has_any_cpu;
|
||||
int type;
|
||||
size_t size;
|
||||
struct perf_record_cpu_map_data *data;
|
||||
};
|
||||
|
||||
static void synthesize_cpus(struct synthesize_cpu_map_data *data)
|
||||
{
|
||||
int i, map_nr = perf_cpu_map__nr(map);
|
||||
|
||||
data->cpus_data.nr = map_nr;
|
||||
|
||||
for (i = 0; i < map_nr; i++)
|
||||
data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu;
|
||||
data->data->type = PERF_CPU_MAP__CPUS;
|
||||
data->data->cpus_data.nr = data->nr;
|
||||
for (int i = 0; i < data->nr; i++)
|
||||
data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu;
|
||||
}
|
||||
|
||||
static void synthesize_mask(struct perf_record_cpu_map_data *data,
|
||||
const struct perf_cpu_map *map, int max)
|
||||
static void synthesize_mask(struct synthesize_cpu_map_data *data)
|
||||
{
|
||||
int idx;
|
||||
struct perf_cpu cpu;
|
||||
|
||||
/* Due to padding, the 4bytes per entry mask variant is always smaller. */
|
||||
data->mask32_data.nr = BITS_TO_U32(max);
|
||||
data->mask32_data.long_size = 4;
|
||||
data->data->type = PERF_CPU_MAP__MASK;
|
||||
data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu);
|
||||
data->data->mask32_data.long_size = 4;
|
||||
|
||||
perf_cpu_map__for_each_cpu(cpu, idx, map) {
|
||||
perf_cpu_map__for_each_cpu(cpu, idx, data->map) {
|
||||
int bit_word = cpu.cpu / 32;
|
||||
__u32 bit_mask = 1U << (cpu.cpu & 31);
|
||||
u32 bit_mask = 1U << (cpu.cpu & 31);
|
||||
|
||||
data->mask32_data.mask[bit_word] |= bit_mask;
|
||||
data->data->mask32_data.mask[bit_word] |= bit_mask;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t cpus_size(const struct perf_cpu_map *map)
|
||||
static void synthesize_range_cpus(struct synthesize_cpu_map_data *data)
|
||||
{
|
||||
return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
|
||||
data->data->type = PERF_CPU_MAP__RANGE_CPUS;
|
||||
data->data->range_cpu_data.any_cpu = data->has_any_cpu;
|
||||
data->data->range_cpu_data.start_cpu = data->min_cpu;
|
||||
data->data->range_cpu_data.end_cpu = data->max_cpu;
|
||||
}
|
||||
|
||||
static size_t mask_size(const struct perf_cpu_map *map, int *max)
|
||||
{
|
||||
*max = perf_cpu_map__max(map).cpu;
|
||||
return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32);
|
||||
}
|
||||
|
||||
static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size,
|
||||
u16 *type, int *max)
|
||||
static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data,
|
||||
size_t header_size)
|
||||
{
|
||||
size_t size_cpus, size_mask;
|
||||
bool is_dummy = perf_cpu_map__empty(map);
|
||||
|
||||
/*
|
||||
* Both array and mask data have variable size based
|
||||
* on the number of cpus and their actual values.
|
||||
* The size of the 'struct perf_record_cpu_map_data' is:
|
||||
*
|
||||
* array = size of 'struct cpu_map_entries' +
|
||||
* number of cpus * sizeof(u64)
|
||||
*
|
||||
* mask = size of 'struct perf_record_record_cpu_map' +
|
||||
* maximum cpu bit converted to size of longs
|
||||
*
|
||||
* and finally + the size of 'struct perf_record_cpu_map_data'.
|
||||
*/
|
||||
size_cpus = cpus_size(map);
|
||||
size_mask = mask_size(map, max);
|
||||
syn_data->nr = perf_cpu_map__nr(syn_data->map);
|
||||
syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0;
|
||||
|
||||
if (is_dummy || (size_cpus < size_mask)) {
|
||||
*size += size_cpus;
|
||||
*type = PERF_CPU_MAP__CPUS;
|
||||
} else {
|
||||
*size += size_mask;
|
||||
*type = PERF_CPU_MAP__MASK;
|
||||
syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu;
|
||||
syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu;
|
||||
if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) {
|
||||
/* A consecutive range of CPUs can be encoded using a range. */
|
||||
assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64));
|
||||
syn_data->type = PERF_CPU_MAP__RANGE_CPUS;
|
||||
syn_data->size = header_size + sizeof(u64);
|
||||
return zalloc(syn_data->size);
|
||||
}
|
||||
|
||||
*size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */
|
||||
*size = PERF_ALIGN(*size, sizeof(u64));
|
||||
return zalloc(*size);
|
||||
size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16);
|
||||
/* Due to padding, the 4bytes per entry mask variant is always smaller. */
|
||||
size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) +
|
||||
BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32);
|
||||
if (syn_data->has_any_cpu || size_cpus < size_mask) {
|
||||
/* Follow the CPU map encoding. */
|
||||
syn_data->type = PERF_CPU_MAP__CPUS;
|
||||
syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64));
|
||||
return zalloc(syn_data->size);
|
||||
}
|
||||
/* Encode using a bitmask. */
|
||||
syn_data->type = PERF_CPU_MAP__MASK;
|
||||
syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64));
|
||||
return zalloc(syn_data->size);
|
||||
}
|
||||
|
||||
static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
|
||||
const struct perf_cpu_map *map,
|
||||
u16 type, int max)
|
||||
static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data)
|
||||
{
|
||||
data->type = type;
|
||||
|
||||
switch (type) {
|
||||
switch (data->type) {
|
||||
case PERF_CPU_MAP__CPUS:
|
||||
synthesize_cpus(data, map);
|
||||
synthesize_cpus(data);
|
||||
break;
|
||||
case PERF_CPU_MAP__MASK:
|
||||
synthesize_mask(data, map, max);
|
||||
synthesize_mask(data);
|
||||
break;
|
||||
case PERF_CPU_MAP__RANGE_CPUS:
|
||||
synthesize_range_cpus(data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1289,23 +1293,22 @@ static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
|
||||
|
||||
static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map)
|
||||
{
|
||||
size_t size = sizeof(struct perf_event_header);
|
||||
struct synthesize_cpu_map_data syn_data = { .map = map };
|
||||
struct perf_record_cpu_map *event;
|
||||
int max;
|
||||
u16 type;
|
||||
|
||||
event = cpu_map_data__alloc(map, &size, &type, &max);
|
||||
|
||||
event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header));
|
||||
if (!event)
|
||||
return NULL;
|
||||
|
||||
syn_data.data = &event->data;
|
||||
event->header.type = PERF_RECORD_CPU_MAP;
|
||||
event->header.size = size;
|
||||
event->data.type = type;
|
||||
|
||||
cpu_map_data__synthesize(&event->data, map, type, max);
|
||||
event->header.size = syn_data.size;
|
||||
cpu_map_data__synthesize(&syn_data);
|
||||
return event;
|
||||
}
|
||||
|
||||
|
||||
int perf_event__synthesize_cpu_map(struct perf_tool *tool,
|
||||
const struct perf_cpu_map *map,
|
||||
perf_event__handler_t process,
|
||||
@ -1998,24 +2001,20 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
|
||||
int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
|
||||
perf_event__handler_t process)
|
||||
{
|
||||
size_t size = sizeof(struct perf_event_header) + sizeof(u64) + sizeof(u64);
|
||||
struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
|
||||
struct perf_record_event_update *ev;
|
||||
int max, err;
|
||||
u16 type;
|
||||
int err;
|
||||
|
||||
if (!evsel->core.own_cpus)
|
||||
return 0;
|
||||
|
||||
ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
|
||||
ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64));
|
||||
if (!ev)
|
||||
return -ENOMEM;
|
||||
|
||||
syn_data.data = &ev->cpus.cpus;
|
||||
ev->header.type = PERF_RECORD_EVENT_UPDATE;
|
||||
ev->header.size = (u16)size;
|
||||
ev->header.size = (u16)syn_data.size;
|
||||
ev->type = PERF_EVENT_UPDATE__CPUS;
|
||||
ev->id = evsel->core.id[0];
|
||||
|
||||
cpu_map_data__synthesize(&ev->cpus.cpus, evsel->core.own_cpus, type, max);
|
||||
cpu_map_data__synthesize(&syn_data);
|
||||
|
||||
err = process(tool, (union perf_event *)ev, NULL, NULL);
|
||||
free(ev);
|
||||
|
Loading…
Reference in New Issue
Block a user