linux/arch/ia64/include/asm/topology.h
Mike Galbraith 0ec9fab3d1 sched: Improve latencies and throughput
Make the idle balancer more agressive, to improve a
x264 encoding workload provided by Jason Garrett-Glaser:

 NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 252.82 fps, 22096.60 kb/s
 encoded 600 frames, 250.69 fps, 22096.60 kb/s
 encoded 600 frames, 245.76 fps, 22096.60 kb/s

 NO_NEXT_BUDDY LB_BIAS
 encoded 600 frames, 344.44 fps, 22096.60 kb/s
 encoded 600 frames, 346.66 fps, 22096.60 kb/s
 encoded 600 frames, 352.59 fps, 22096.60 kb/s

 NO_NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 425.75 fps, 22096.60 kb/s
 encoded 600 frames, 425.45 fps, 22096.60 kb/s
 encoded 600 frames, 422.49 fps, 22096.60 kb/s

Peter pointed out that this is better done via newidle_idx,
not via LB_BIAS, newidle balancing should look for where
there is load _now_, not where there was load 2 ticks ago.

Worst-case latencies are improved as well as no buddies
means less vruntime spread. (as per prior lkml discussions)

This change improves kbuild-peak parallelism as well.

Reported-by: Jason Garrett-Glaser <darkshikari@gmail.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1253011667.9128.16.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-15 16:51:16 +02:00

125 lines
3.2 KiB
C

/*
* Copyright (C) 2002, Erich Focht, NEC
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#ifndef _ASM_IA64_TOPOLOGY_H
#define _ASM_IA64_TOPOLOGY_H
#include <asm/acpi.h>
#include <asm/numa.h>
#include <asm/smp.h>
#ifdef CONFIG_NUMA
/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */
#define PENALTY_FOR_NODE_WITH_CPUS 255
/*
* Distance above which we begin to use zone reclaim
*/
#define RECLAIM_DISTANCE 15
/*
* Returns the number of the node containing CPU 'cpu'
*/
#define cpu_to_node(cpu) (int)(cpu_to_node_map[cpu])
/*
* Returns a bitmask of CPUs on Node 'node'.
*/
#define node_to_cpumask(node) (node_to_cpu_mask[node])
#define cpumask_of_node(node) (&node_to_cpu_mask[node])
/*
* Returns the number of the node containing Node 'nid'.
* Not implemented here. Multi-level hierarchies detected with
* the help of node_distance().
*/
#define parent_node(nid) (nid)
/*
* Determines the node for a given pci bus
*/
#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
void build_cpu_to_node_map(void);
#define SD_CPU_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 1, \
.max_interval = 4, \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 2, \
.idle_idx = 1, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_BALANCE_WAKE \
| SD_WAKE_AFFINE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
.max_interval = 8*(min(num_online_cpus(), 32U)), \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 3, \
.idle_idx = 2, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_BALANCE_WAKE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 64, \
.nr_balance_failed = 0, \
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id)
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define smt_capable() (smp_num_siblings > 1)
#endif
extern void arch_fix_phys_package_id(int num, u32 slot);
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
#include <asm-generic/topology.h>
#endif /* _ASM_IA64_TOPOLOGY_H */