linux/arch/arc/mm/dma.c
Alexey Brodkin f2b0b25a37 ARCv2: Support IO Coherency and permutations involving L1 and L2 caches
In case of ARCv2 CPU there're could be following configurations
that affect cache handling for data exchanged with peripherals
via DMA:
 [1] Only L1 cache exists
 [2] Both L1 and L2 exist, but no IO coherency unit
 [3] L1, L2 caches and IO coherency unit exist

Current implementation takes care of [1] and [2].
Moreover support of [2] is implemented with run-time check
for SLC existence which is not super optimal.

This patch introduces support of [3] and rework of DMA ops
usage. Instead of doing run-time check every time a particular
DMA op is executed we'll have 3 different implementations of
DMA ops and select appropriate one during init.

As for IOC support for it we need:
 [a] Implement empty DMA ops because IOC takes care of cache
     coherency with DMAed data
 [b] Route dma_alloc_coherent() via dma_alloc_noncoherent()
     This is required to make IOC work in first place and also
     serves as optimization as LD/ST to coherent buffers can be
     srviced from caches w/o going all the way to memory

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
[vgupta:
  -Added some comments about IOC gains
  -Marked dma ops as static,
  -Massaged changelog a bit]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2015-08-20 18:11:17 +05:30

121 lines
3.3 KiB
C

/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/*
* DMA Coherent API Notes
*
* I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
* implemented by accessintg it using a kernel virtual address, with
* Cache bit off in the TLB entry.
*
* The default DMA address == Phy address which is 0x8000_0000 based.
*/
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/export.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
/*
* Helpers for Coherent DMA API.
*/
void *dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
void *paddr;
/* This is linear addr (0x8000_0000 based) */
paddr = alloc_pages_exact(size, gfp);
if (!paddr)
return NULL;
/* This is bus address, platform dependent */
*dma_handle = (dma_addr_t)paddr;
return paddr;
}
EXPORT_SYMBOL(dma_alloc_noncoherent);
void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_noncoherent);
void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
void *paddr, *kvaddr;
/*
* IOC relies on all data (even coherent DMA data) being in cache
* Thus allocate normal cached memory
*
* The gains with IOC are two pronged:
* -For streaming data, elides needs for cache maintenance, saving
* cycles in flush code, and bus bandwidth as all the lines of a
* buffer need to be flushed out to memory
* -For coherent data, Read/Write to buffers terminate early in cache
* (vs. always going to memory - thus are faster)
*/
if (ioc_exists)
return dma_alloc_noncoherent(dev, size, dma_handle, gfp);
/* This is linear addr (0x8000_0000 based) */
paddr = alloc_pages_exact(size, gfp);
if (!paddr)
return NULL;
/* This is kernel Virtual address (0x7000_0000 based) */
kvaddr = ioremap_nocache((unsigned long)paddr, size);
if (kvaddr == NULL)
return NULL;
/* This is bus address, platform dependent */
*dma_handle = (dma_addr_t)paddr;
/*
* Evict any existing L1 and/or L2 lines for the backing page
* in case it was used earlier as a normal "cached" page.
* Yeah this bit us - STAR 9000898266
*
* Although core does call flush_cache_vmap(), it gets kvaddr hence
* can't be used to efficiently flush L1 and/or L2 which need paddr
* Currently flush_cache_vmap nukes the L1 cache completely which
* will be optimized as a separate commit
*/
dma_cache_wback_inv((unsigned long)paddr, size);
return kvaddr;
}
EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
dma_addr_t dma_handle)
{
if (ioc_exists)
return dma_free_noncoherent(dev, size, kvaddr, dma_handle);
iounmap((void __force __iomem *)kvaddr);
free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_coherent);
/*
* Helper for streaming DMA...
*/
void __arc_dma_cache_sync(unsigned long paddr, size_t size,
enum dma_data_direction dir)
{
__inline_dma_cache_sync(paddr, size, dir);
}
EXPORT_SYMBOL(__arc_dma_cache_sync);