forked from Minki/linux
ca8ce3d0b1
It is better not to think about compute capacity as being equivalent to "CPU power". The upcoming "power aware" scheduler work may create confusion with the notion of energy consumption if "power" is used too liberally. This contains the architecture visible changes. Incidentally, only ARM takes advantage of the available pow^H^H^Hcapacity scaling hooks and therefore those changes outside kernel/sched/ are confined to one ARM specific file. The default arch_scale_smt_power() hook is not overridden by anyone. Replacements are as follows: arch_scale_freq_power --> arch_scale_freq_capacity arch_scale_smt_power --> arch_scale_smt_capacity SCHED_POWER_SCALE --> SCHED_CAPACITY_SCALE SCHED_POWER_SHIFT --> SCHED_CAPACITY_SHIFT The local usage of "power" in arch/arm/kernel/topology.c is also changed to "capacity" as appropriate. Signed-off-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Morten Rasmussen <morten.rasmussen@arm.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: linaro-kernel@lists.linaro.org Cc: Arnd Bergmann <arnd@arndb.de> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Grant Likely <grant.likely@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Brown <broonie@linaro.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: devicetree@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/n/tip-48zba9qbznvglwelgq2cfygh@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
319 lines
8.7 KiB
C
319 lines
8.7 KiB
C
/*
|
|
* arch/arm/kernel/topology.c
|
|
*
|
|
* Copyright (C) 2011 Linaro Limited.
|
|
* Written by: Vincent Guittot
|
|
*
|
|
* based on arch/sh/kernel/topology.c
|
|
*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*/
|
|
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/export.h>
|
|
#include <linux/init.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/node.h>
|
|
#include <linux/nodemask.h>
|
|
#include <linux/of.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/cputype.h>
|
|
#include <asm/topology.h>
|
|
|
|
/*
|
|
* cpu capacity scale management
|
|
*/
|
|
|
|
/*
|
|
* cpu capacity table
|
|
* This per cpu data structure describes the relative capacity of each core.
|
|
* On a heteregenous system, cores don't have the same computation capacity
|
|
* and we reflect that difference in the cpu_capacity field so the scheduler
|
|
* can take this difference into account during load balance. A per cpu
|
|
* structure is preferred because each CPU updates its own cpu_capacity field
|
|
* during the load balance except for idle cores. One idle core is selected
|
|
* to run the rebalance_domains for all idle cores and the cpu_capacity can be
|
|
* updated during this sequence.
|
|
*/
|
|
static DEFINE_PER_CPU(unsigned long, cpu_scale);
|
|
|
|
unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
|
|
{
|
|
return per_cpu(cpu_scale, cpu);
|
|
}
|
|
|
|
static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
|
|
{
|
|
per_cpu(cpu_scale, cpu) = capacity;
|
|
}
|
|
|
|
#ifdef CONFIG_OF
|
|
struct cpu_efficiency {
|
|
const char *compatible;
|
|
unsigned long efficiency;
|
|
};
|
|
|
|
/*
|
|
* Table of relative efficiency of each processors
|
|
* The efficiency value must fit in 20bit and the final
|
|
* cpu_scale value must be in the range
|
|
* 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
|
|
* in order to return at most 1 when DIV_ROUND_CLOSEST
|
|
* is used to compute the capacity of a CPU.
|
|
* Processors that are not defined in the table,
|
|
* use the default SCHED_CAPACITY_SCALE value for cpu_scale.
|
|
*/
|
|
static const struct cpu_efficiency table_efficiency[] = {
|
|
{"arm,cortex-a15", 3891},
|
|
{"arm,cortex-a7", 2048},
|
|
{NULL, },
|
|
};
|
|
|
|
static unsigned long *__cpu_capacity;
|
|
#define cpu_capacity(cpu) __cpu_capacity[cpu]
|
|
|
|
static unsigned long middle_capacity = 1;
|
|
|
|
/*
|
|
* Iterate all CPUs' descriptor in DT and compute the efficiency
|
|
* (as per table_efficiency). Also calculate a middle efficiency
|
|
* as close as possible to (max{eff_i} - min{eff_i}) / 2
|
|
* This is later used to scale the cpu_capacity field such that an
|
|
* 'average' CPU is of middle capacity. Also see the comments near
|
|
* table_efficiency[] and update_cpu_capacity().
|
|
*/
|
|
static void __init parse_dt_topology(void)
|
|
{
|
|
const struct cpu_efficiency *cpu_eff;
|
|
struct device_node *cn = NULL;
|
|
unsigned long min_capacity = (unsigned long)(-1);
|
|
unsigned long max_capacity = 0;
|
|
unsigned long capacity = 0;
|
|
int alloc_size, cpu = 0;
|
|
|
|
alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
|
|
__cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
const u32 *rate;
|
|
int len;
|
|
|
|
/* too early to use cpu->of_node */
|
|
cn = of_get_cpu_node(cpu, NULL);
|
|
if (!cn) {
|
|
pr_err("missing device node for CPU %d\n", cpu);
|
|
continue;
|
|
}
|
|
|
|
for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
|
|
if (of_device_is_compatible(cn, cpu_eff->compatible))
|
|
break;
|
|
|
|
if (cpu_eff->compatible == NULL)
|
|
continue;
|
|
|
|
rate = of_get_property(cn, "clock-frequency", &len);
|
|
if (!rate || len != 4) {
|
|
pr_err("%s missing clock-frequency property\n",
|
|
cn->full_name);
|
|
continue;
|
|
}
|
|
|
|
capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
|
|
|
|
/* Save min capacity of the system */
|
|
if (capacity < min_capacity)
|
|
min_capacity = capacity;
|
|
|
|
/* Save max capacity of the system */
|
|
if (capacity > max_capacity)
|
|
max_capacity = capacity;
|
|
|
|
cpu_capacity(cpu) = capacity;
|
|
}
|
|
|
|
/* If min and max capacities are equals, we bypass the update of the
|
|
* cpu_scale because all CPUs have the same capacity. Otherwise, we
|
|
* compute a middle_capacity factor that will ensure that the capacity
|
|
* of an 'average' CPU of the system will be as close as possible to
|
|
* SCHED_CAPACITY_SCALE, which is the default value, but with the
|
|
* constraint explained near table_efficiency[].
|
|
*/
|
|
if (4*max_capacity < (3*(max_capacity + min_capacity)))
|
|
middle_capacity = (min_capacity + max_capacity)
|
|
>> (SCHED_CAPACITY_SHIFT+1);
|
|
else
|
|
middle_capacity = ((max_capacity / 3)
|
|
>> (SCHED_CAPACITY_SHIFT-1)) + 1;
|
|
|
|
}
|
|
|
|
/*
|
|
* Look for a customed capacity of a CPU in the cpu_capacity table during the
|
|
* boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
|
|
* function returns directly for SMP system.
|
|
*/
|
|
static void update_cpu_capacity(unsigned int cpu)
|
|
{
|
|
if (!cpu_capacity(cpu))
|
|
return;
|
|
|
|
set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
|
|
|
|
printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
|
|
cpu, arch_scale_freq_capacity(NULL, cpu));
|
|
}
|
|
|
|
#else
|
|
static inline void parse_dt_topology(void) {}
|
|
static inline void update_cpu_capacity(unsigned int cpuid) {}
|
|
#endif
|
|
|
|
/*
|
|
* cpu topology table
|
|
*/
|
|
struct cputopo_arm cpu_topology[NR_CPUS];
|
|
EXPORT_SYMBOL_GPL(cpu_topology);
|
|
|
|
const struct cpumask *cpu_coregroup_mask(int cpu)
|
|
{
|
|
return &cpu_topology[cpu].core_sibling;
|
|
}
|
|
|
|
/*
|
|
* The current assumption is that we can power gate each core independently.
|
|
* This will be superseded by DT binding once available.
|
|
*/
|
|
const struct cpumask *cpu_corepower_mask(int cpu)
|
|
{
|
|
return &cpu_topology[cpu].thread_sibling;
|
|
}
|
|
|
|
static void update_siblings_masks(unsigned int cpuid)
|
|
{
|
|
struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
|
|
int cpu;
|
|
|
|
/* update core and thread sibling masks */
|
|
for_each_possible_cpu(cpu) {
|
|
cpu_topo = &cpu_topology[cpu];
|
|
|
|
if (cpuid_topo->socket_id != cpu_topo->socket_id)
|
|
continue;
|
|
|
|
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
|
|
if (cpu != cpuid)
|
|
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
|
|
|
|
if (cpuid_topo->core_id != cpu_topo->core_id)
|
|
continue;
|
|
|
|
cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
|
|
if (cpu != cpuid)
|
|
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
|
|
}
|
|
smp_wmb();
|
|
}
|
|
|
|
/*
|
|
* store_cpu_topology is called at boot when only one cpu is running
|
|
* and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
|
|
* which prevents simultaneous write access to cpu_topology array
|
|
*/
|
|
void store_cpu_topology(unsigned int cpuid)
|
|
{
|
|
struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
|
|
unsigned int mpidr;
|
|
|
|
/* If the cpu topology has been already set, just return */
|
|
if (cpuid_topo->core_id != -1)
|
|
return;
|
|
|
|
mpidr = read_cpuid_mpidr();
|
|
|
|
/* create cpu topology mapping */
|
|
if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) {
|
|
/*
|
|
* This is a multiprocessor system
|
|
* multiprocessor format & multiprocessor mode field are set
|
|
*/
|
|
|
|
if (mpidr & MPIDR_MT_BITMASK) {
|
|
/* core performance interdependency */
|
|
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
|
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
|
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
|
|
} else {
|
|
/* largely independent cores */
|
|
cpuid_topo->thread_id = -1;
|
|
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
|
cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
|
}
|
|
} else {
|
|
/*
|
|
* This is an uniprocessor system
|
|
* we are in multiprocessor format but uniprocessor system
|
|
* or in the old uniprocessor format
|
|
*/
|
|
cpuid_topo->thread_id = -1;
|
|
cpuid_topo->core_id = 0;
|
|
cpuid_topo->socket_id = -1;
|
|
}
|
|
|
|
update_siblings_masks(cpuid);
|
|
|
|
update_cpu_capacity(cpuid);
|
|
|
|
printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
|
|
cpuid, cpu_topology[cpuid].thread_id,
|
|
cpu_topology[cpuid].core_id,
|
|
cpu_topology[cpuid].socket_id, mpidr);
|
|
}
|
|
|
|
static inline const int cpu_corepower_flags(void)
|
|
{
|
|
return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN;
|
|
}
|
|
|
|
static struct sched_domain_topology_level arm_topology[] = {
|
|
#ifdef CONFIG_SCHED_MC
|
|
{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
|
|
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
|
|
#endif
|
|
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
|
|
{ NULL, },
|
|
};
|
|
|
|
/*
|
|
* init_cpu_topology is called at boot when only one cpu is running
|
|
* which prevent simultaneous write access to cpu_topology array
|
|
*/
|
|
void __init init_cpu_topology(void)
|
|
{
|
|
unsigned int cpu;
|
|
|
|
/* init core mask and capacity */
|
|
for_each_possible_cpu(cpu) {
|
|
struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
|
|
|
|
cpu_topo->thread_id = -1;
|
|
cpu_topo->core_id = -1;
|
|
cpu_topo->socket_id = -1;
|
|
cpumask_clear(&cpu_topo->core_sibling);
|
|
cpumask_clear(&cpu_topo->thread_sibling);
|
|
|
|
set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
|
|
}
|
|
smp_wmb();
|
|
|
|
parse_dt_topology();
|
|
|
|
/* Set scheduler topology descriptor */
|
|
set_sched_topology(arm_topology);
|
|
}
|