linux/arch/tile/include/asm/mmu_context.h
Christoph Lameter b4f501916c tile: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x).  This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.

Other use cases are for storing and retrieving data from the current
processors percpu area.  __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))

__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.

This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset.  Thereby address calculations are avoided and less registers
are used when code is generated.

At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.

The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e.  using a global
register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

	DEFINE_PER_CPU(int, y);
	int *x = &__get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

	DEFINE_PER_CPU(int, y[20]);
	int *x = __get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu
variable.

	DEFINE_PER_CPU(int, y);
	int x = __get_cpu_var(y)

   Converts to

	int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

	DEFINE_PER_CPU(struct mystruct, y);
	struct mystruct x = __get_cpu_var(y);

   Converts to

	memcpy(&x, this_cpu_ptr(&y), sizeof(x));

5. Assignment to a per cpu variable

	DEFINE_PER_CPU(int, y)
	__get_cpu_var(y) = x;

   Converts to

	__this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

	DEFINE_PER_CPU(int, y);
	__get_cpu_var(y)++

   Converts to

	__this_cpu_inc(y)

Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-26 13:45:54 -04:00

136 lines
4.5 KiB
C

/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _ASM_TILE_MMU_CONTEXT_H
#define _ASM_TILE_MMU_CONTEXT_H
#include <linux/smp.h>
#include <asm/setup.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/homecache.h>
#include <asm-generic/mm_hooks.h>
static inline int
init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
return 0;
}
/*
* Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S
* also call hv_install_context().
*/
static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
{
/* FIXME: DIRECTIO should not always be set. FIXME. */
int rc = hv_install_context(__pa(pgdir), prot, asid,
HV_CTX_DIRECTIO | CTX_PAGE_FLAG);
if (rc < 0)
panic("hv_install_context failed: %d", rc);
}
static inline void install_page_table(pgd_t *pgdir, int asid)
{
pte_t *ptep = virt_to_kpte((unsigned long)pgdir);
__install_page_table(pgdir, asid, *ptep);
}
/*
* "Lazy" TLB mode is entered when we are switching to a kernel task,
* which borrows the mm of the previous task. The goal of this
* optimization is to avoid having to install a new page table. On
* early x86 machines (where the concept originated) you couldn't do
* anything short of a full page table install for invalidation, so
* handling a remote TLB invalidate required doing a page table
* re-install. Someone clearly decided that it was silly to keep
* doing this while in "lazy" TLB mode, so the optimization involves
* installing the swapper page table instead the first time one
* occurs, and clearing the cpu out of cpu_vm_mask, so the cpu running
* the kernel task doesn't need to take any more interrupts. At that
* point it's then necessary to explicitly reinstall it when context
* switching back to the original mm.
*
* On Tile, we have to do a page-table install whenever DMA is enabled,
* so in that case lazy mode doesn't help anyway. And more generally,
* we have efficient per-page TLB shootdown, and don't expect to spend
* that much time in kernel tasks in general, so just leaving the
* kernel task borrowing the old page table, but handling TLB
* shootdowns, is a reasonable thing to do. And importantly, this
* lets us use the hypervisor's internal APIs for TLB shootdown, which
* means we don't have to worry about having TLB shootdowns blocked
* when Linux is disabling interrupts; see the page migration code for
* an example of where it's important for TLB shootdowns to complete
* even when interrupts are disabled at the Linux level.
*/
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t)
{
#if CHIP_HAS_TILE_DMA()
/*
* We have to do an "identity" page table switch in order to
* clear any pending DMA interrupts.
*/
if (current->thread.tile_dma_state.enabled)
install_page_table(mm->pgd, __this_cpu_read(current_asid));
#endif
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
if (likely(prev != next)) {
int cpu = smp_processor_id();
/* Pick new ASID. */
int asid = __this_cpu_read(current_asid) + 1;
if (asid > max_asid) {
asid = min_asid;
local_flush_tlb();
}
__this_cpu_write(current_asid, asid);
/* Clear cpu from the old mm, and set it in the new one. */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
/* Re-load page tables */
install_page_table(next->pgd, asid);
/* See how we should set the red/black cache info */
check_mm_caching(prev, next);
/*
* Since we're changing to a new mm, we have to flush
* the icache in case some physical page now being mapped
* has subsequently been repurposed and has new code.
*/
__flush_icache();
}
}
static inline void activate_mm(struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
switch_mm(prev_mm, next_mm, NULL);
}
#define destroy_context(mm) do { } while (0)
#define deactivate_mm(tsk, mm) do { } while (0)
#endif /* _ASM_TILE_MMU_CONTEXT_H */