perf topology: Add core_wide
It is possible to optimize metrics when all SMT threads (CPUs) on a core are measuring events in system wide mode. For example, TMA metrics defines CORE_CLKS for Sandybrdige as: if SMT is disabled: CPU_CLK_UNHALTED.THREAD if SMT is enabled and recording on all SMT threads: CPU_CLK_UNHALTED.THREAD_ANY / 2 if SMT is enabled and not recording on all SMT threads: (CPU_CLK_UNHALTED.THREAD/2)* (1+CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE/CPU_CLK_UNHALTED.REF_XCLK ) That is two more events are necessary when not gathering counts on all SMT threads. To distinguish all SMT threads on a core vs system wide (all CPUs) call the new property core wide. Add a core wide test that determines the property from user requested CPUs, the topology and system wide. System wide is required as other processes running on a SMT thread will change the counts. Signed-off-by: Ian Rogers <irogers@google.com> Cc: Ahmad Yasin <ahmad.yasin@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Caleb Biggers <caleb.biggers@intel.com> Cc: Florian Fischer <florian.fischer@muhq.space> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.garry@huawei.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Miaoqian Lin <linmq006@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Perry Taylor <perry.taylor@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Link: https://lore.kernel.org/r/20220831174926.579643-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
09b73fe9e3
commit
cc2c4e26ec
@ -172,6 +172,52 @@ bool cpu_topology__smt_on(const struct cpu_topology *topology)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cpu_topology__core_wide(const struct cpu_topology *topology,
|
||||
const char *user_requested_cpu_list)
|
||||
{
|
||||
struct perf_cpu_map *user_requested_cpus;
|
||||
|
||||
/*
|
||||
* If user_requested_cpu_list is empty then all CPUs are recorded and so
|
||||
* core_wide is true.
|
||||
*/
|
||||
if (!user_requested_cpu_list)
|
||||
return true;
|
||||
|
||||
user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
|
||||
/* Check that every user requested CPU is the complete set of SMT threads on a core. */
|
||||
for (u32 i = 0; i < topology->core_cpus_lists; i++) {
|
||||
const char *core_cpu_list = topology->core_cpus_list[i];
|
||||
struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
|
||||
struct perf_cpu cpu;
|
||||
int idx;
|
||||
bool has_first, first = true;
|
||||
|
||||
perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
|
||||
if (first) {
|
||||
has_first = perf_cpu_map__has(user_requested_cpus, cpu);
|
||||
first = false;
|
||||
} else {
|
||||
/*
|
||||
* If the first core CPU is user requested then
|
||||
* all subsequent CPUs in the core must be user
|
||||
* requested too. If the first CPU isn't user
|
||||
* requested then none of the others must be
|
||||
* too.
|
||||
*/
|
||||
if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
|
||||
perf_cpu_map__put(core_cpus);
|
||||
perf_cpu_map__put(user_requested_cpus);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
perf_cpu_map__put(core_cpus);
|
||||
}
|
||||
perf_cpu_map__put(user_requested_cpus);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool has_die_topology(void)
|
||||
{
|
||||
char filename[MAXPATHLEN];
|
||||
|
@ -60,6 +60,9 @@ struct cpu_topology *cpu_topology__new(void);
|
||||
void cpu_topology__delete(struct cpu_topology *tp);
|
||||
/* Determine from the core list whether SMT was enabled. */
|
||||
bool cpu_topology__smt_on(const struct cpu_topology *topology);
|
||||
/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
|
||||
bool cpu_topology__core_wide(const struct cpu_topology *topology,
|
||||
const char *user_requested_cpu_list);
|
||||
|
||||
struct numa_topology *numa_topology__new(void);
|
||||
void numa_topology__delete(struct numa_topology *tp);
|
||||
|
@ -21,3 +21,17 @@ bool smt_on(const struct cpu_topology *topology)
|
||||
cached = true;
|
||||
return cached_result;
|
||||
}
|
||||
|
||||
bool core_wide(bool system_wide, const char *user_requested_cpu_list,
|
||||
const struct cpu_topology *topology)
|
||||
{
|
||||
/* If not everything running on a core is being recorded then we can't use core_wide. */
|
||||
if (!system_wide)
|
||||
return false;
|
||||
|
||||
/* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
|
||||
if (!smt_on(topology))
|
||||
return true;
|
||||
|
||||
return cpu_topology__core_wide(topology, user_requested_cpu_list);
|
||||
}
|
||||
|
@ -7,4 +7,11 @@ struct cpu_topology;
|
||||
/* Returns true if SMT (aka hyperthreading) is enabled. */
|
||||
bool smt_on(const struct cpu_topology *topology);
|
||||
|
||||
/*
|
||||
* Returns true when system wide and all SMT threads for a core are in the
|
||||
* user_requested_cpus map.
|
||||
*/
|
||||
bool core_wide(bool system_wide, const char *user_requested_cpu_list,
|
||||
const struct cpu_topology *topology);
|
||||
|
||||
#endif /* __SMT_H */
|
||||
|
Loading…
Reference in New Issue
Block a user