powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian

The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle
unaligned invocations.  However, these shifts were designed for
big-endian systems: On little-endian systems, they must shift in the
opposite direction.

This commit relies on the C preprocessor to insert the correct shifts
into the assembly code.

[ This is a rare but nasty LE issue. Most of the time we use the POWER7
optimised __copy_tofrom_user_power7 loop, but when it hits an exception
we fall back to the base __copy_tofrom_user loop. - Anton ]

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Paul E. McKenney 2013-12-18 09:29:57 +11:00 committed by Benjamin Herrenschmidt
parent e8a00ad5e2
commit 20151169f1

View File

@ -9,6 +9,14 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define sLd sld /* Shift towards low-numbered address. */
#define sHd srd /* Shift towards high-numbered address. */
#else
#define sLd srd /* Shift towards low-numbered address. */
#define sHd sld /* Shift towards high-numbered address. */
#endif
.align 7
_GLOBAL(__copy_tofrom_user)
BEGIN_FTR_SECTION
@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
25: ld r0,8(r4)
sld r6,r9,r10
sLd r6,r9,r10
26: ldu r9,16(r4)
srd r7,r0,r11
sld r8,r0,r10
sHd r7,r0,r11
sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
27: ld r0,8(r4)
@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
29: ldu r9,8(r4)
sld r8,r0,r10
sLd r8,r0,r10
addi r3,r3,-8
blt cr6,5f
30: ld r0,8(r4)
srd r12,r9,r11
sld r6,r9,r10
sHd r12,r9,r11
sLd r6,r9,r10
31: ldu r9,16(r4)
or r12,r8,r12
srd r7,r0,r11
sld r8,r0,r10
sHd r7,r0,r11
sLd r8,r0,r10
addi r3,r3,16
beq cr6,78f
1: or r7,r7,r6
32: ld r0,8(r4)
76: std r12,8(r3)
2: srd r12,r9,r11
sld r6,r9,r10
2: sHd r12,r9,r11
sLd r6,r9,r10
33: ldu r9,16(r4)
or r12,r8,r12
77: stdu r7,16(r3)
srd r7,r0,r11
sld r8,r0,r10
sHd r7,r0,r11
sLd r8,r0,r10
bdnz 1b
78: std r12,8(r3)
or r7,r7,r6
79: std r7,16(r3)
5: srd r12,r9,r11
5: sHd r12,r9,r11
or r12,r8,r12
80: std r12,24(r3)
bne 6f
@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
sld r9,r9,r10
sLd r9,r9,r10
ble cr1,7f
34: ld r0,8(r4)
srd r7,r0,r11
sHd r7,r0,r11
or r9,r7,r9
7:
bf cr7*4+1,1f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,32
#endif
94: stw r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,32
#endif
addi r3,r3,4
1: bf cr7*4+2,2f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,16
#endif
95: sth r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,16
#endif
addi r3,r3,2
2: bf cr7*4+3,3f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,8
#endif
96: stb r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,8
#endif
3: li r3,0
blr