From b93c68648426f906d63b98117496b6415f505f39 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 5 Nov 2008 08:50:23 -0500
Subject: Blackfin: only flag L1 instruction for DMA memcpy

The performance difference from doing an 8 bit DMA memcpy vs an optimized
core memcpy can be pretty big when you add in the overhead of setting up the
MDMA registers, cache flushes, etc...  So only use dma_memcpy() when we
actually require it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/include/asm-blackfin/blackfin_local.h b/include/asm-blackfin/blackfin_local.h
index 4e15834..e17d8a2 100644
--- a/include/asm-blackfin/blackfin_local.h
+++ b/include/asm-blackfin/blackfin_local.h
@@ -66,17 +66,11 @@ extern void blackfin_dcache_flush_range(const void *, const void *);
 extern void blackfin_icache_dcache_flush_range(const void *, const void *);
 extern void blackfin_dcache_flush_invalidate_range(const void *, const void *);
 
-/* Use DMA to move data from on chip to external memory.  While this is
- * required for only L1 instruction (it is not directly readable by the
- * core via data loads), it isn't a huge performance issue for other
- * regions (it's probably even faster than core load/stores).  However,
- * the DMA engine does not have access to the L1 scratchpad, and we
- * cannot use DMA inside of the MMR space.
+/* Use DMA to move data from on chip to external memory.  The L1 instruction
+ * regions can only be accessed via DMA, so if the address in question is in
+ * that region, make sure we attempt to DMA indirectly.
  */
-# define addr_bfin_on_chip_mem(addr) \
-	(((unsigned long)(addr) >= 0xef000000 && (unsigned long)addr < SYSMMR_BASE) && \
-	 !((unsigned long)(addr) >= L1_SRAM_SCRATCH && \
-	   (unsigned long)(addr) < L1_SRAM_SCRATCH_END))
+# define addr_bfin_on_chip_mem(addr) (((unsigned long)(addr) & 0xFFF00000) == 0xFFA00000)
 
 # include <asm/system.h>
 
-- 
cgit v0.10.2