mirror of
https://github.com/torvalds/linux.git
synced 2024-11-27 14:41:39 +00:00
6f91ea283a
To stitch LBR call stack, the max LBR information is required. So the CPU PMU capabilities information has to be stored in perf header. Add a new feature HEADER_CPU_PMU_CAPS for CPU PMU capabilities. Retrieve all CPU PMU capabilities, not just max LBR information. Add variable max_branches to facilitate future usage. Committer testing: # ls -la /sys/devices/cpu/caps/ total 0 drwxr-xr-x. 2 root root 0 Apr 17 10:53 . drwxr-xr-x. 6 root root 0 Apr 17 07:02 .. -r--r--r--. 1 root root 4096 Apr 17 10:53 max_precise # # cat /sys/devices/cpu/caps/max_precise 0 # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.033 MB perf.data (7 samples) ] # # perf report --header-only | egrep 'cpu(desc|.*capabilities)' # cpudesc : AMD Ryzen 5 3600X 6-Core Processor # cpu pmu capabilities: max_precise=0 # And then on an Intel machine: $ ls -la /sys/devices/cpu/caps/ total 0 drwxr-xr-x. 2 root root 0 Apr 17 10:51 . drwxr-xr-x. 6 root root 0 Apr 17 10:04 .. -r--r--r--. 1 root root 4096 Apr 17 11:37 branches -r--r--r--. 1 root root 4096 Apr 17 10:51 max_precise -r--r--r--. 1 root root 4096 Apr 17 11:37 pmu_name $ cat /sys/devices/cpu/caps/max_precise 3 $ cat /sys/devices/cpu/caps/branches 32 $ cat /sys/devices/cpu/caps/pmu_name skylake $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf report --header-only | egrep 'cpu(desc|.*capabilities)' # cpudesc : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz # cpu pmu capabilities: branches=32, max_precise=3, pmu_name=skylake $ Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexey Budankov <alexey.budankov@linux.intel.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Pavel Gerasimov <pavel.gerasimov@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Cc: Stephane Eranian <eranian@google.com> Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com> Link: http://lore.kernel.org/lkml/20200319202517.23423-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
139 lines
3.0 KiB
C
139 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PERF_ENV_H
|
|
#define __PERF_ENV_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/rbtree.h>
|
|
#include "rwsem.h"
|
|
|
|
struct perf_cpu_map;
|
|
|
|
struct cpu_topology_map {
|
|
int socket_id;
|
|
int die_id;
|
|
int core_id;
|
|
};
|
|
|
|
struct cpu_cache_level {
|
|
u32 level;
|
|
u32 line_size;
|
|
u32 sets;
|
|
u32 ways;
|
|
char *type;
|
|
char *size;
|
|
char *map;
|
|
};
|
|
|
|
struct numa_node {
|
|
u32 node;
|
|
u64 mem_total;
|
|
u64 mem_free;
|
|
struct perf_cpu_map *map;
|
|
};
|
|
|
|
struct memory_node {
|
|
u64 node;
|
|
u64 size;
|
|
unsigned long *set;
|
|
};
|
|
|
|
struct perf_env {
|
|
char *hostname;
|
|
char *os_release;
|
|
char *version;
|
|
char *arch;
|
|
int nr_cpus_online;
|
|
int nr_cpus_avail;
|
|
char *cpu_desc;
|
|
char *cpuid;
|
|
unsigned long long total_mem;
|
|
unsigned int msr_pmu_type;
|
|
unsigned int max_branches;
|
|
|
|
int nr_cmdline;
|
|
int nr_sibling_cores;
|
|
int nr_sibling_dies;
|
|
int nr_sibling_threads;
|
|
int nr_numa_nodes;
|
|
int nr_memory_nodes;
|
|
int nr_pmu_mappings;
|
|
int nr_groups;
|
|
int nr_cpu_pmu_caps;
|
|
char *cmdline;
|
|
const char **cmdline_argv;
|
|
char *sibling_cores;
|
|
char *sibling_dies;
|
|
char *sibling_threads;
|
|
char *pmu_mappings;
|
|
char *cpu_pmu_caps;
|
|
struct cpu_topology_map *cpu;
|
|
struct cpu_cache_level *caches;
|
|
int caches_cnt;
|
|
u32 comp_ratio;
|
|
u32 comp_ver;
|
|
u32 comp_type;
|
|
u32 comp_level;
|
|
u32 comp_mmap_len;
|
|
struct numa_node *numa_nodes;
|
|
struct memory_node *memory_nodes;
|
|
unsigned long long memory_bsize;
|
|
u64 clockid_res_ns;
|
|
|
|
/*
|
|
* bpf_info_lock protects bpf rbtrees. This is needed because the
|
|
* trees are accessed by different threads in perf-top
|
|
*/
|
|
struct {
|
|
struct rw_semaphore lock;
|
|
struct rb_root infos;
|
|
u32 infos_cnt;
|
|
struct rb_root btfs;
|
|
u32 btfs_cnt;
|
|
} bpf_progs;
|
|
|
|
/* same reason as above (for perf-top) */
|
|
struct {
|
|
struct rw_semaphore lock;
|
|
struct rb_root tree;
|
|
} cgroups;
|
|
|
|
/* For fast cpu to numa node lookup via perf_env__numa_node */
|
|
int *numa_map;
|
|
int nr_numa_map;
|
|
};
|
|
|
|
enum perf_compress_type {
|
|
PERF_COMP_NONE = 0,
|
|
PERF_COMP_ZSTD,
|
|
PERF_COMP_MAX
|
|
};
|
|
|
|
struct bpf_prog_info_node;
|
|
struct btf_node;
|
|
|
|
extern struct perf_env perf_env;
|
|
|
|
void perf_env__exit(struct perf_env *env);
|
|
|
|
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
|
|
|
|
int perf_env__read_cpuid(struct perf_env *env);
|
|
int perf_env__read_cpu_topology_map(struct perf_env *env);
|
|
|
|
void cpu_cache_level__free(struct cpu_cache_level *cache);
|
|
|
|
const char *perf_env__arch(struct perf_env *env);
|
|
const char *perf_env__raw_arch(struct perf_env *env);
|
|
int perf_env__nr_cpus_avail(struct perf_env *env);
|
|
|
|
void perf_env__init(struct perf_env *env);
|
|
void perf_env__insert_bpf_prog_info(struct perf_env *env,
|
|
struct bpf_prog_info_node *info_node);
|
|
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
|
|
__u32 prog_id);
|
|
void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
|
|
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
|
|
|
|
int perf_env__numa_node(struct perf_env *env, int cpu);
|
|
#endif /* __PERF_ENV_H */
|