linux/arch/arm/mm/highmem.c
Thomas Petazzoni 9ff0bb5ba6 ARM: 8180/1: mm: implement no-highmem fast path in kmap_atomic_pfn()
Since CONFIG_HIGHMEM got enabled on ARMv5 Kirkwood, we have noticed a
very significant drop in networking performance. The test were
conducted on an OpenBlocks A7 board. Without this patch, the outgoing
performance measured with iperf are:

 - highmem OFF, TSO OFF   544 Mbit/s
 - highmem OFF, TSO ON	  942 Mbit/s
 - highmem ON,  TSO OFF   306 Mbit/s
 - highmem ON,  TSO ON    246 Mbit/s

On this Kirkwood platform, the L2 cache is a Feroceon cache, and with
this cache, all the range operations have to be done on virtual
addresses and not physical addresses. Therefore, whenever
CONFIG_HIGHMEM is enabled, the cache maintenance operations call
kmap_atomic_pfn() and kunmap_atomic().

However, kmap_atomic_pfn() does not implement the same fast path for
non-highmem pages as the one implemented in kmap_atomic(), and this is
one of the reason for the performance drop. While this patch does not
fully restore the performances, it clearly improves them a lot:

      	      	        without patch  with patch

 - highmem ON, TSO OFF   306 Mbit/s     387 Mbit/s
 - highmem ON, TSO ON    246 Mbit/s     434 Mbit/s

We're still far from the !CONFIG_HIGHMEM performances, but it does
improve a bit the situation.

Thanks a lot to Ezequiel Garcia and Gregory Clement for all the
testing work around this topic.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-10-29 17:22:07 +00:00

156 lines
3.5 KiB
C

/*
* arch/arm/mm/highmem.c -- ARM highmem support
*
* Author: Nicolas Pitre
* Created: september 8, 2008
* Copyright: Marvell Semiconductors Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/interrupt.h>
#include <asm/fixmap.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include "mm.h"
pte_t *fixmap_page_table;
static inline void set_fixmap_pte(int idx, pte_t pte)
{
unsigned long vaddr = __fix_to_virt(idx);
set_pte_ext(fixmap_page_table + idx, pte, 0);
local_flush_tlb_kernel_page(vaddr);
}
static inline pte_t get_fixmap_pte(unsigned long vaddr)
{
unsigned long idx = __virt_to_fix(vaddr);
return *(fixmap_page_table + idx);
}
void *kmap(struct page *page)
{
might_sleep();
if (!PageHighMem(page))
return page_address(page);
return kmap_high(page);
}
EXPORT_SYMBOL(kmap);
void kunmap(struct page *page)
{
BUG_ON(in_interrupt());
if (!PageHighMem(page))
return;
kunmap_high(page);
}
EXPORT_SYMBOL(kunmap);
void *kmap_atomic(struct page *page)
{
unsigned int idx;
unsigned long vaddr;
void *kmap;
int type;
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
#ifdef CONFIG_DEBUG_HIGHMEM
/*
* There is no cache coherency issue when non VIVT, so force the
* dedicated kmap usage for better debugging purposes in that case.
*/
if (!cache_is_vivt())
kmap = NULL;
else
#endif
kmap = kmap_high_get(page);
if (kmap)
return kmap;
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
/*
* With debugging enabled, kunmap_atomic forces that entry to 0.
* Make sure it was indeed properly unmapped.
*/
BUG_ON(!pte_none(*(fixmap_page_table + idx)));
#endif
/*
* When debugging is off, kunmap_atomic leaves the previous mapping
* in place, so the contained TLB flush ensures the TLB is updated
* with the new mapping.
*/
set_fixmap_pte(idx, mk_pte(page, kmap_prot));
return (void *)vaddr;
}
EXPORT_SYMBOL(kmap_atomic);
void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
int idx, type;
if (kvaddr >= (void *)FIXADDR_START) {
type = kmap_atomic_idx();
idx = type + KM_TYPE_NR * smp_processor_id();
if (cache_is_vivt())
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(vaddr != __fix_to_virt(idx));
set_fixmap_pte(idx, __pte(0));
#else
(void) idx; /* to kill a warning */
#endif
kmap_atomic_idx_pop();
} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
/* this address was obtained through kmap_high_get() */
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
}
pagefault_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
void *kmap_atomic_pfn(unsigned long pfn)
{
unsigned long vaddr;
int idx, type;
struct page *page = pfn_to_page(pfn);
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(!pte_none(*(fixmap_page_table + idx)));
#endif
set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
return (void *)vaddr;
}
struct page *kmap_atomic_to_page(const void *ptr)
{
unsigned long vaddr = (unsigned long)ptr;
if (vaddr < FIXADDR_START)
return virt_to_page(ptr);
return pte_page(get_fixmap_pte(vaddr));
}