forked from Minki/linux
powerpc: memcpy optimization for 64bit LE
Unaligned stores take alignment exceptions on POWER7 running in little-endian. This is a dumb little-endian base memcpy that prevents unaligned stores. Once booted the feature fixup code switches over to the VMX copy loops (which are already endian safe). The question is what we do before that switch over. The base 64bit memcpy takes alignment exceptions on POWER7 so we can't use it as is. Fixing the causes of alignment exception would slow it down, because we'd need to ensure all loads and stores are aligned either through rotate tricks or bytewise loads and stores. Either would be bad for all other 64bit platforms. [ I simplified the loop a bit - Anton ] Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
parent
48ce3b7cc6
commit
00f554fade
@ -10,9 +10,7 @@
|
|||||||
#define __HAVE_ARCH_STRNCMP
|
#define __HAVE_ARCH_STRNCMP
|
||||||
#define __HAVE_ARCH_STRCAT
|
#define __HAVE_ARCH_STRCAT
|
||||||
#define __HAVE_ARCH_MEMSET
|
#define __HAVE_ARCH_MEMSET
|
||||||
#ifdef __BIG_ENDIAN__
|
|
||||||
#define __HAVE_ARCH_MEMCPY
|
#define __HAVE_ARCH_MEMCPY
|
||||||
#endif
|
|
||||||
#define __HAVE_ARCH_MEMMOVE
|
#define __HAVE_ARCH_MEMMOVE
|
||||||
#define __HAVE_ARCH_MEMCMP
|
#define __HAVE_ARCH_MEMCMP
|
||||||
#define __HAVE_ARCH_MEMCHR
|
#define __HAVE_ARCH_MEMCHR
|
||||||
@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
|
|||||||
extern int strncmp(const char *, const char *, __kernel_size_t);
|
extern int strncmp(const char *, const char *, __kernel_size_t);
|
||||||
extern char * strcat(char *, const char *);
|
extern char * strcat(char *, const char *);
|
||||||
extern void * memset(void *,int,__kernel_size_t);
|
extern void * memset(void *,int,__kernel_size_t);
|
||||||
#ifdef __BIG_ENDIAN__
|
|
||||||
extern void * memcpy(void *,const void *,__kernel_size_t);
|
extern void * memcpy(void *,const void *,__kernel_size_t);
|
||||||
#endif
|
|
||||||
extern void * memmove(void *,const void *,__kernel_size_t);
|
extern void * memmove(void *,const void *,__kernel_size_t);
|
||||||
extern int memcmp(const void *,const void *,__kernel_size_t);
|
extern int memcmp(const void *,const void *,__kernel_size_t);
|
||||||
extern void * memchr(const void *,int,__kernel_size_t);
|
extern void * memchr(const void *,int,__kernel_size_t);
|
||||||
|
@ -155,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2);
|
|||||||
#endif
|
#endif
|
||||||
long long __bswapdi2(long long);
|
long long __bswapdi2(long long);
|
||||||
EXPORT_SYMBOL(__bswapdi2);
|
EXPORT_SYMBOL(__bswapdi2);
|
||||||
#ifdef __BIG_ENDIAN__
|
|
||||||
EXPORT_SYMBOL(memcpy);
|
EXPORT_SYMBOL(memcpy);
|
||||||
#endif
|
|
||||||
EXPORT_SYMBOL(memset);
|
EXPORT_SYMBOL(memset);
|
||||||
EXPORT_SYMBOL(memmove);
|
EXPORT_SYMBOL(memmove);
|
||||||
EXPORT_SYMBOL(memcmp);
|
EXPORT_SYMBOL(memcmp);
|
||||||
|
@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
|
|||||||
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
|
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
|
|
||||||
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
|
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
|
||||||
endif
|
|
||||||
|
|
||||||
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
|
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
|
||||||
|
|
||||||
|
@ -12,12 +12,27 @@
|
|||||||
.align 7
|
.align 7
|
||||||
_GLOBAL(memcpy)
|
_GLOBAL(memcpy)
|
||||||
BEGIN_FTR_SECTION
|
BEGIN_FTR_SECTION
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
cmpdi cr7,r5,0
|
||||||
|
#else
|
||||||
std r3,48(r1) /* save destination pointer for return value */
|
std r3,48(r1) /* save destination pointer for return value */
|
||||||
|
#endif
|
||||||
FTR_SECTION_ELSE
|
FTR_SECTION_ELSE
|
||||||
#ifndef SELFTEST
|
#ifndef SELFTEST
|
||||||
b memcpy_power7
|
b memcpy_power7
|
||||||
#endif
|
#endif
|
||||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
|
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
/* dumb little-endian memcpy that will get replaced at runtime */
|
||||||
|
addi r9,r3,-1
|
||||||
|
addi r4,r4,-1
|
||||||
|
beqlr cr7
|
||||||
|
mtctr r5
|
||||||
|
1: lbzu r10,1(r4)
|
||||||
|
stbu r10,1(r9)
|
||||||
|
bdnz 1b
|
||||||
|
blr
|
||||||
|
#else
|
||||||
PPC_MTOCRF(0x01,r5)
|
PPC_MTOCRF(0x01,r5)
|
||||||
cmpldi cr1,r5,16
|
cmpldi cr1,r5,16
|
||||||
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
|
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
|
||||||
@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
|
|||||||
stb r0,0(r3)
|
stb r0,0(r3)
|
||||||
4: ld r3,48(r1) /* return dest pointer */
|
4: ld r3,48(r1) /* return dest pointer */
|
||||||
blr
|
blr
|
||||||
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user