mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
sh: Provide sdivsi3/udivsi3/udivdi3 for sh64, kill off libgcc linking.
This moves in the necessary libgcc bits and kills off the libgcc linking for sh64 kernels as well. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
parent
209aa4fdc3
commit
180ae2037f
@ -177,10 +177,8 @@ KBUILD_CFLAGS += -pipe $(cflags-y)
|
|||||||
KBUILD_CPPFLAGS += $(cflags-y)
|
KBUILD_CPPFLAGS += $(cflags-y)
|
||||||
KBUILD_AFLAGS += $(cflags-y)
|
KBUILD_AFLAGS += $(cflags-y)
|
||||||
|
|
||||||
LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
|
|
||||||
|
|
||||||
libs-$(CONFIG_SUPERH32) := arch/sh/lib/ $(libs-y)
|
libs-$(CONFIG_SUPERH32) := arch/sh/lib/ $(libs-y)
|
||||||
libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y) $(LIBGCC)
|
libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y)
|
||||||
|
|
||||||
PHONY += maketools FORCE
|
PHONY += maketools FORCE
|
||||||
|
|
||||||
|
@ -76,7 +76,5 @@ EXPORT_SYMBOL(strcpy);
|
|||||||
#define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name)
|
#define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name)
|
||||||
|
|
||||||
DECLARE_EXPORT(__sdivsi3);
|
DECLARE_EXPORT(__sdivsi3);
|
||||||
DECLARE_EXPORT(__sdivsi3_2);
|
|
||||||
DECLARE_EXPORT(__muldi3);
|
|
||||||
DECLARE_EXPORT(__udivsi3);
|
DECLARE_EXPORT(__udivsi3);
|
||||||
DECLARE_EXPORT(__div_table);
|
DECLARE_EXPORT(__div_table);
|
||||||
|
@ -12,3 +12,6 @@
|
|||||||
# Panic should really be compiled as PIC
|
# Panic should really be compiled as PIC
|
||||||
lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \
|
lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \
|
||||||
copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
|
copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
|
||||||
|
|
||||||
|
# Extracted from libgcc
|
||||||
|
lib-y += udivsi3.o udivdi3.o sdivsi3.o
|
||||||
|
131
arch/sh/lib64/sdivsi3.S
Normal file
131
arch/sh/lib64/sdivsi3.S
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
.global __sdivsi3
|
||||||
|
.section .text..SHmedia32,"ax"
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
/* inputs: r4,r5 */
|
||||||
|
/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
|
||||||
|
/* result in r0 */
|
||||||
|
__sdivsi3:
|
||||||
|
ptb __div_table,tr0
|
||||||
|
|
||||||
|
nsb r5, r1
|
||||||
|
shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
|
||||||
|
shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
|
||||||
|
/* bubble */
|
||||||
|
gettr tr0,r20
|
||||||
|
ldx.ub r20, r21, r19 /* u0.8 */
|
||||||
|
shari r25, 32, r25 /* normalize to s2.30 */
|
||||||
|
shlli r21, 1, r21
|
||||||
|
muls.l r25, r19, r19 /* s2.38 */
|
||||||
|
ldx.w r20, r21, r21 /* s2.14 */
|
||||||
|
ptabs r18, tr0
|
||||||
|
shari r19, 24, r19 /* truncate to s2.14 */
|
||||||
|
sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
|
||||||
|
muls.l r19, r19, r21 /* u0.28 */
|
||||||
|
sub r63, r1, r1
|
||||||
|
addi r1, 92, r1
|
||||||
|
muls.l r25, r21, r18 /* s2.58 */
|
||||||
|
shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
|
||||||
|
/* bubble */
|
||||||
|
sub r19, r18, r18
|
||||||
|
shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
|
||||||
|
muls.l r18, r25, r0 /* s2.60 */
|
||||||
|
muls.l r18, r4, r25 /* s32.30 */
|
||||||
|
/* bubble */
|
||||||
|
shari r0, 16, r19 /* s-16.44 */
|
||||||
|
muls.l r19, r18, r19 /* s-16.74 */
|
||||||
|
shari r25, 63, r0
|
||||||
|
shari r4, 14, r18 /* s19.-14 */
|
||||||
|
shari r19, 30, r19 /* s-16.44 */
|
||||||
|
muls.l r19, r18, r19 /* s15.30 */
|
||||||
|
xor r21, r0, r21 /* You could also use the constant 1 << 27. */
|
||||||
|
add r21, r25, r21
|
||||||
|
sub r21, r19, r21
|
||||||
|
shard r21, r1, r21
|
||||||
|
sub r21, r0, r0
|
||||||
|
blink tr0, r63
|
||||||
|
|
||||||
|
/* This table has been generated by divtab.c .
|
||||||
|
Defects for bias -330:
|
||||||
|
Max defect: 6.081536e-07 at -1.000000e+00
|
||||||
|
Min defect: 2.849516e-08 at 1.030651e+00
|
||||||
|
Max 2nd step defect: 9.606539e-12 at -1.000000e+00
|
||||||
|
Min 2nd step defect: 0.000000e+00 at 0.000000e+00
|
||||||
|
Defect at 1: 1.238659e-07
|
||||||
|
Defect at -2: 1.061708e-07 */
|
||||||
|
|
||||||
|
.balign 2
|
||||||
|
.type __div_table,@object
|
||||||
|
.size __div_table,128
|
||||||
|
/* negative division constants */
|
||||||
|
.word -16638
|
||||||
|
.word -17135
|
||||||
|
.word -17737
|
||||||
|
.word -18433
|
||||||
|
.word -19103
|
||||||
|
.word -19751
|
||||||
|
.word -20583
|
||||||
|
.word -21383
|
||||||
|
.word -22343
|
||||||
|
.word -23353
|
||||||
|
.word -24407
|
||||||
|
.word -25582
|
||||||
|
.word -26863
|
||||||
|
.word -28382
|
||||||
|
.word -29965
|
||||||
|
.word -31800
|
||||||
|
/* negative division factors */
|
||||||
|
.byte 66
|
||||||
|
.byte 70
|
||||||
|
.byte 75
|
||||||
|
.byte 81
|
||||||
|
.byte 87
|
||||||
|
.byte 93
|
||||||
|
.byte 101
|
||||||
|
.byte 109
|
||||||
|
.byte 119
|
||||||
|
.byte 130
|
||||||
|
.byte 142
|
||||||
|
.byte 156
|
||||||
|
.byte 172
|
||||||
|
.byte 192
|
||||||
|
.byte 214
|
||||||
|
.byte 241
|
||||||
|
.skip 16
|
||||||
|
.global __div_table
|
||||||
|
__div_table:
|
||||||
|
.skip 16
|
||||||
|
/* positive division factors */
|
||||||
|
.byte 241
|
||||||
|
.byte 214
|
||||||
|
.byte 192
|
||||||
|
.byte 172
|
||||||
|
.byte 156
|
||||||
|
.byte 142
|
||||||
|
.byte 130
|
||||||
|
.byte 119
|
||||||
|
.byte 109
|
||||||
|
.byte 101
|
||||||
|
.byte 93
|
||||||
|
.byte 87
|
||||||
|
.byte 81
|
||||||
|
.byte 75
|
||||||
|
.byte 70
|
||||||
|
.byte 66
|
||||||
|
/* positive division constants */
|
||||||
|
.word 31801
|
||||||
|
.word 29966
|
||||||
|
.word 28383
|
||||||
|
.word 26864
|
||||||
|
.word 25583
|
||||||
|
.word 24408
|
||||||
|
.word 23354
|
||||||
|
.word 22344
|
||||||
|
.word 21384
|
||||||
|
.word 20584
|
||||||
|
.word 19752
|
||||||
|
.word 19104
|
||||||
|
.word 18434
|
||||||
|
.word 17738
|
||||||
|
.word 17136
|
||||||
|
.word 16639
|
120
arch/sh/lib64/udivdi3.S
Normal file
120
arch/sh/lib64/udivdi3.S
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
.section .text..SHmedia32,"ax"
|
||||||
|
.align 2
|
||||||
|
.global __udivdi3
|
||||||
|
__udivdi3:
|
||||||
|
shlri r3,1,r4
|
||||||
|
nsb r4,r22
|
||||||
|
shlld r3,r22,r6
|
||||||
|
shlri r6,49,r5
|
||||||
|
movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
|
||||||
|
sub r21,r5,r1
|
||||||
|
mmulfx.w r1,r1,r4
|
||||||
|
mshflo.w r1,r63,r1
|
||||||
|
sub r63,r22,r20 // r63 == 64 % 64
|
||||||
|
mmulfx.w r5,r4,r4
|
||||||
|
pta large_divisor,tr0
|
||||||
|
addi r20,32,r9
|
||||||
|
msub.w r1,r4,r1
|
||||||
|
madd.w r1,r1,r1
|
||||||
|
mmulfx.w r1,r1,r4
|
||||||
|
shlri r6,32,r7
|
||||||
|
bgt/u r9,r63,tr0 // large_divisor
|
||||||
|
mmulfx.w r5,r4,r4
|
||||||
|
shlri r2,32+14,r19
|
||||||
|
addi r22,-31,r0
|
||||||
|
msub.w r1,r4,r1
|
||||||
|
|
||||||
|
mulu.l r1,r7,r4
|
||||||
|
addi r1,-3,r5
|
||||||
|
mulu.l r5,r19,r5
|
||||||
|
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
|
||||||
|
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
|
||||||
|
the case may be, %0000000000000000 000.11111111111, still */
|
||||||
|
muls.l r1,r4,r4 /* leaving at least one sign bit. */
|
||||||
|
mulu.l r5,r3,r8
|
||||||
|
mshalds.l r1,r21,r1
|
||||||
|
shari r4,26,r4
|
||||||
|
shlld r8,r0,r8
|
||||||
|
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
|
||||||
|
sub r2,r8,r2
|
||||||
|
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
|
||||||
|
|
||||||
|
shlri r2,22,r21
|
||||||
|
mulu.l r21,r1,r21
|
||||||
|
shlld r5,r0,r8
|
||||||
|
addi r20,30-22,r0
|
||||||
|
shlrd r21,r0,r21
|
||||||
|
mulu.l r21,r3,r5
|
||||||
|
add r8,r21,r8
|
||||||
|
mcmpgt.l r21,r63,r21 // See Note 1
|
||||||
|
addi r20,30,r0
|
||||||
|
mshfhi.l r63,r21,r21
|
||||||
|
sub r2,r5,r2
|
||||||
|
andc r2,r21,r2
|
||||||
|
|
||||||
|
/* small divisor: need a third divide step */
|
||||||
|
mulu.l r2,r1,r7
|
||||||
|
ptabs r18,tr0
|
||||||
|
addi r2,1,r2
|
||||||
|
shlrd r7,r0,r7
|
||||||
|
mulu.l r7,r3,r5
|
||||||
|
add r8,r7,r8
|
||||||
|
sub r2,r3,r2
|
||||||
|
cmpgt r2,r5,r5
|
||||||
|
add r8,r5,r2
|
||||||
|
/* could test r3 here to check for divide by zero. */
|
||||||
|
blink tr0,r63
|
||||||
|
|
||||||
|
large_divisor:
|
||||||
|
mmulfx.w r5,r4,r4
|
||||||
|
shlrd r2,r9,r25
|
||||||
|
shlri r25,32,r8
|
||||||
|
msub.w r1,r4,r1
|
||||||
|
|
||||||
|
mulu.l r1,r7,r4
|
||||||
|
addi r1,-3,r5
|
||||||
|
mulu.l r5,r8,r5
|
||||||
|
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
|
||||||
|
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
|
||||||
|
the case may be, %0000000000000000 000.11111111111, still */
|
||||||
|
muls.l r1,r4,r4 /* leaving at least one sign bit. */
|
||||||
|
shlri r5,14-1,r8
|
||||||
|
mulu.l r8,r7,r5
|
||||||
|
mshalds.l r1,r21,r1
|
||||||
|
shari r4,26,r4
|
||||||
|
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
|
||||||
|
sub r25,r5,r25
|
||||||
|
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
|
||||||
|
|
||||||
|
shlri r25,22,r21
|
||||||
|
mulu.l r21,r1,r21
|
||||||
|
pta no_lo_adj,tr0
|
||||||
|
addi r22,32,r0
|
||||||
|
shlri r21,40,r21
|
||||||
|
mulu.l r21,r7,r5
|
||||||
|
add r8,r21,r8
|
||||||
|
shlld r2,r0,r2
|
||||||
|
sub r25,r5,r25
|
||||||
|
bgtu/u r7,r25,tr0 // no_lo_adj
|
||||||
|
addi r8,1,r8
|
||||||
|
sub r25,r7,r25
|
||||||
|
no_lo_adj:
|
||||||
|
mextr4 r2,r25,r2
|
||||||
|
|
||||||
|
/* large_divisor: only needs a few adjustments. */
|
||||||
|
mulu.l r8,r6,r5
|
||||||
|
ptabs r18,tr0
|
||||||
|
/* bubble */
|
||||||
|
cmpgtu r5,r2,r5
|
||||||
|
sub r8,r5,r2
|
||||||
|
blink tr0,r63
|
||||||
|
|
||||||
|
/* Note 1: To shift the result of the second divide stage so that the result
|
||||||
|
always fits into 32 bits, yet we still reduce the rest sufficiently
|
||||||
|
would require a lot of instructions to do the shifts just right. Using
|
||||||
|
the full 64 bit shift result to multiply with the divisor would require
|
||||||
|
four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
|
||||||
|
Fortunately, if the upper 32 bits of the shift result are nonzero, we
|
||||||
|
know that the rest after taking this partial result into account will
|
||||||
|
fit into 32 bits. So we just clear the upper 32 bits of the rest if the
|
||||||
|
upper 32 bits of the partial result are nonzero. */
|
59
arch/sh/lib64/udivsi3.S
Normal file
59
arch/sh/lib64/udivsi3.S
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
.global __udivsi3
|
||||||
|
.section .text..SHmedia32,"ax"
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
inputs: r4,r5
|
||||||
|
clobbered: r18,r19,r20,r21,r22,r25,tr0
|
||||||
|
result in r0.
|
||||||
|
*/
|
||||||
|
__udivsi3:
|
||||||
|
addz.l r5,r63,r22
|
||||||
|
nsb r22,r0
|
||||||
|
shlld r22,r0,r25
|
||||||
|
shlri r25,48,r25
|
||||||
|
movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
|
||||||
|
sub r20,r25,r21
|
||||||
|
mmulfx.w r21,r21,r19
|
||||||
|
mshflo.w r21,r63,r21
|
||||||
|
ptabs r18,tr0
|
||||||
|
mmulfx.w r25,r19,r19
|
||||||
|
sub r20,r0,r0
|
||||||
|
/* bubble */
|
||||||
|
msub.w r21,r19,r19
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It would be nice for scheduling to do this add to r21 before
|
||||||
|
* the msub.w, but we need a different value for r19 to keep
|
||||||
|
* errors under control.
|
||||||
|
*/
|
||||||
|
addi r19,-2,r21
|
||||||
|
mulu.l r4,r21,r18
|
||||||
|
mmulfx.w r19,r19,r19
|
||||||
|
shlli r21,15,r21
|
||||||
|
shlrd r18,r0,r18
|
||||||
|
mulu.l r18,r22,r20
|
||||||
|
mmacnfx.wl r25,r19,r21
|
||||||
|
/* bubble */
|
||||||
|
sub r4,r20,r25
|
||||||
|
|
||||||
|
mulu.l r25,r21,r19
|
||||||
|
addi r0,14,r0
|
||||||
|
/* bubble */
|
||||||
|
shlrd r19,r0,r19
|
||||||
|
mulu.l r19,r22,r20
|
||||||
|
add r18,r19,r18
|
||||||
|
/* bubble */
|
||||||
|
sub.l r25,r20,r25
|
||||||
|
|
||||||
|
mulu.l r25,r21,r19
|
||||||
|
addz.l r25,r63,r25
|
||||||
|
sub r25,r22,r25
|
||||||
|
shlrd r19,r0,r19
|
||||||
|
mulu.l r19,r22,r20
|
||||||
|
addi r25,1,r25
|
||||||
|
add r18,r19,r18
|
||||||
|
|
||||||
|
cmpgt r25,r20,r25
|
||||||
|
add.l r18,r25,r0
|
||||||
|
blink tr0,r63
|
Loading…
Reference in New Issue
Block a user