forked from Minki/linux
e74e396204
This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: David S. Miller <davem@davemloft.net> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Reviewed-by: Christoph Lameter <cl@linux.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Mikael Starvik <starvik@axis.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Bryan Wu <cooloney@kernel.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Grant Grundler <grundler@parisc-linux.org> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Ingo Molnar <mingo@elte.hu>
178 lines
4.8 KiB
C
178 lines
4.8 KiB
C
/*
|
|
* linux/mm/allocpercpu.c
|
|
*
|
|
* Separated from slab.c August 11, 2006 Christoph Lameter
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/module.h>
|
|
#include <linux/bootmem.h>
|
|
#include <asm/sections.h>
|
|
|
|
#ifndef cache_line_size
|
|
#define cache_line_size() L1_CACHE_BYTES
|
|
#endif
|
|
|
|
/**
|
|
* percpu_depopulate - depopulate per-cpu data for given cpu
|
|
* @__pdata: per-cpu data to depopulate
|
|
* @cpu: depopulate per-cpu data for this cpu
|
|
*
|
|
* Depopulating per-cpu data for a cpu going offline would be a typical
|
|
* use case. You need to register a cpu hotplug handler for that purpose.
|
|
*/
|
|
static void percpu_depopulate(void *__pdata, int cpu)
|
|
{
|
|
struct percpu_data *pdata = __percpu_disguise(__pdata);
|
|
|
|
kfree(pdata->ptrs[cpu]);
|
|
pdata->ptrs[cpu] = NULL;
|
|
}
|
|
|
|
/**
|
|
* percpu_depopulate_mask - depopulate per-cpu data for some cpu's
|
|
* @__pdata: per-cpu data to depopulate
|
|
* @mask: depopulate per-cpu data for cpu's selected through mask bits
|
|
*/
|
|
static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask)
|
|
{
|
|
int cpu;
|
|
for_each_cpu_mask_nr(cpu, *mask)
|
|
percpu_depopulate(__pdata, cpu);
|
|
}
|
|
|
|
#define percpu_depopulate_mask(__pdata, mask) \
|
|
__percpu_depopulate_mask((__pdata), &(mask))
|
|
|
|
/**
|
|
* percpu_populate - populate per-cpu data for given cpu
|
|
* @__pdata: per-cpu data to populate further
|
|
* @size: size of per-cpu object
|
|
* @gfp: may sleep or not etc.
|
|
* @cpu: populate per-data for this cpu
|
|
*
|
|
* Populating per-cpu data for a cpu coming online would be a typical
|
|
* use case. You need to register a cpu hotplug handler for that purpose.
|
|
* Per-cpu object is populated with zeroed buffer.
|
|
*/
|
|
static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
|
|
{
|
|
struct percpu_data *pdata = __percpu_disguise(__pdata);
|
|
int node = cpu_to_node(cpu);
|
|
|
|
/*
|
|
* We should make sure each CPU gets private memory.
|
|
*/
|
|
size = roundup(size, cache_line_size());
|
|
|
|
BUG_ON(pdata->ptrs[cpu]);
|
|
if (node_online(node))
|
|
pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node);
|
|
else
|
|
pdata->ptrs[cpu] = kzalloc(size, gfp);
|
|
return pdata->ptrs[cpu];
|
|
}
|
|
|
|
/**
|
|
* percpu_populate_mask - populate per-cpu data for more cpu's
|
|
* @__pdata: per-cpu data to populate further
|
|
* @size: size of per-cpu object
|
|
* @gfp: may sleep or not etc.
|
|
* @mask: populate per-cpu data for cpu's selected through mask bits
|
|
*
|
|
* Per-cpu objects are populated with zeroed buffers.
|
|
*/
|
|
static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
|
|
cpumask_t *mask)
|
|
{
|
|
cpumask_t populated;
|
|
int cpu;
|
|
|
|
cpus_clear(populated);
|
|
for_each_cpu_mask_nr(cpu, *mask)
|
|
if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
|
|
__percpu_depopulate_mask(__pdata, &populated);
|
|
return -ENOMEM;
|
|
} else
|
|
cpu_set(cpu, populated);
|
|
return 0;
|
|
}
|
|
|
|
#define percpu_populate_mask(__pdata, size, gfp, mask) \
|
|
__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
|
|
|
|
/**
|
|
* alloc_percpu - initial setup of per-cpu data
|
|
* @size: size of per-cpu object
|
|
* @align: alignment
|
|
*
|
|
* Allocate dynamic percpu area. Percpu objects are populated with
|
|
* zeroed buffers.
|
|
*/
|
|
void *__alloc_percpu(size_t size, size_t align)
|
|
{
|
|
/*
|
|
* We allocate whole cache lines to avoid false sharing
|
|
*/
|
|
size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
|
|
void *pdata = kzalloc(sz, GFP_KERNEL);
|
|
void *__pdata = __percpu_disguise(pdata);
|
|
|
|
/*
|
|
* Can't easily make larger alignment work with kmalloc. WARN
|
|
* on it. Larger alignment should only be used for module
|
|
* percpu sections on SMP for which this path isn't used.
|
|
*/
|
|
WARN_ON_ONCE(align > SMP_CACHE_BYTES);
|
|
|
|
if (unlikely(!pdata))
|
|
return NULL;
|
|
if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL,
|
|
&cpu_possible_map)))
|
|
return __pdata;
|
|
kfree(pdata);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__alloc_percpu);
|
|
|
|
/**
|
|
* free_percpu - final cleanup of per-cpu data
|
|
* @__pdata: object to clean up
|
|
*
|
|
* We simply clean up any per-cpu object left. No need for the client to
|
|
* track and specify through a bis mask which per-cpu objects are to free.
|
|
*/
|
|
void free_percpu(void *__pdata)
|
|
{
|
|
if (unlikely(!__pdata))
|
|
return;
|
|
__percpu_depopulate_mask(__pdata, cpu_possible_mask);
|
|
kfree(__percpu_disguise(__pdata));
|
|
}
|
|
EXPORT_SYMBOL_GPL(free_percpu);
|
|
|
|
/*
|
|
* Generic percpu area setup.
|
|
*/
|
|
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
|
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
|
|
|
EXPORT_SYMBOL(__per_cpu_offset);
|
|
|
|
void __init setup_per_cpu_areas(void)
|
|
{
|
|
unsigned long size, i;
|
|
char *ptr;
|
|
unsigned long nr_possible_cpus = num_possible_cpus();
|
|
|
|
/* Copy section for each CPU (we discard the original) */
|
|
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
|
|
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
|
|
|
|
for_each_possible_cpu(i) {
|
|
__per_cpu_offset[i] = ptr - __per_cpu_start;
|
|
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
|
|
ptr += size;
|
|
}
|
|
}
|
|
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
|