MIPS: microMIPS: Optimise 'memset' core library function.
Optimise 'memset' to use microMIPS instructions and/or optimisations for binary size reduction. When the microMIPS ISA is not being used, the library function compiles to the original binary code. Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
This commit is contained in:
parent
bce860833a
commit
26c5e07d14
@ -296,6 +296,7 @@ symbol = value
|
|||||||
#define LONG_SUBU subu
|
#define LONG_SUBU subu
|
||||||
#define LONG_L lw
|
#define LONG_L lw
|
||||||
#define LONG_S sw
|
#define LONG_S sw
|
||||||
|
#define LONG_SP swp
|
||||||
#define LONG_SLL sll
|
#define LONG_SLL sll
|
||||||
#define LONG_SLLV sllv
|
#define LONG_SLLV sllv
|
||||||
#define LONG_SRL srl
|
#define LONG_SRL srl
|
||||||
@ -318,6 +319,7 @@ symbol = value
|
|||||||
#define LONG_SUBU dsubu
|
#define LONG_SUBU dsubu
|
||||||
#define LONG_L ld
|
#define LONG_L ld
|
||||||
#define LONG_S sd
|
#define LONG_S sd
|
||||||
|
#define LONG_SP sdp
|
||||||
#define LONG_SLL dsll
|
#define LONG_SLL dsll
|
||||||
#define LONG_SLLV dsllv
|
#define LONG_SLLV dsllv
|
||||||
#define LONG_SRL dsrl
|
#define LONG_SRL dsrl
|
||||||
|
@ -5,7 +5,8 @@
|
|||||||
*
|
*
|
||||||
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle
|
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle
|
||||||
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
|
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
|
||||||
* Copyright (C) 2007 Maciej W. Rozycki
|
* Copyright (C) 2007 by Maciej W. Rozycki
|
||||||
|
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
|
||||||
*/
|
*/
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
@ -19,6 +20,20 @@
|
|||||||
#define LONG_S_R sdr
|
#define LONG_S_R sdr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_MICROMIPS
|
||||||
|
#define STORSIZE (LONGSIZE * 2)
|
||||||
|
#define STORMASK (STORSIZE - 1)
|
||||||
|
#define FILL64RG t8
|
||||||
|
#define FILLPTRG t7
|
||||||
|
#undef LONG_S
|
||||||
|
#define LONG_S LONG_SP
|
||||||
|
#else
|
||||||
|
#define STORSIZE LONGSIZE
|
||||||
|
#define STORMASK LONGMASK
|
||||||
|
#define FILL64RG a1
|
||||||
|
#define FILLPTRG t0
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EX(insn,reg,addr,handler) \
|
#define EX(insn,reg,addr,handler) \
|
||||||
9: insn reg, addr; \
|
9: insn reg, addr; \
|
||||||
.section __ex_table,"a"; \
|
.section __ex_table,"a"; \
|
||||||
@ -26,23 +41,25 @@
|
|||||||
.previous
|
.previous
|
||||||
|
|
||||||
.macro f_fill64 dst, offset, val, fixup
|
.macro f_fill64 dst, offset, val, fixup
|
||||||
EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup)
|
#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
|
||||||
EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup)
|
||||||
#if LONGSIZE == 4
|
EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup)
|
#endif
|
||||||
EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup)
|
#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
|
||||||
EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
|
||||||
EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
|
EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
|
||||||
|
EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
|
||||||
|
EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
@ -71,16 +88,20 @@ LEAF(memset)
|
|||||||
1:
|
1:
|
||||||
|
|
||||||
FEXPORT(__bzero)
|
FEXPORT(__bzero)
|
||||||
sltiu t0, a2, LONGSIZE /* very small region? */
|
sltiu t0, a2, STORSIZE /* very small region? */
|
||||||
bnez t0, .Lsmall_memset
|
bnez t0, .Lsmall_memset
|
||||||
andi t0, a0, LONGMASK /* aligned? */
|
andi t0, a0, STORMASK /* aligned? */
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_MICROMIPS
|
||||||
|
move t8, a1 /* used by 'swp' instruction */
|
||||||
|
move t9, a1
|
||||||
|
#endif
|
||||||
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
|
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
|
||||||
beqz t0, 1f
|
beqz t0, 1f
|
||||||
PTR_SUBU t0, LONGSIZE /* alignment in bytes */
|
PTR_SUBU t0, STORSIZE /* alignment in bytes */
|
||||||
#else
|
#else
|
||||||
.set noat
|
.set noat
|
||||||
li AT, LONGSIZE
|
li AT, STORSIZE
|
||||||
beqz t0, 1f
|
beqz t0, 1f
|
||||||
PTR_SUBU t0, AT /* alignment in bytes */
|
PTR_SUBU t0, AT /* alignment in bytes */
|
||||||
.set at
|
.set at
|
||||||
@ -99,24 +120,27 @@ FEXPORT(__bzero)
|
|||||||
1: ori t1, a2, 0x3f /* # of full blocks */
|
1: ori t1, a2, 0x3f /* # of full blocks */
|
||||||
xori t1, 0x3f
|
xori t1, 0x3f
|
||||||
beqz t1, .Lmemset_partial /* no block to fill */
|
beqz t1, .Lmemset_partial /* no block to fill */
|
||||||
andi t0, a2, 0x40-LONGSIZE
|
andi t0, a2, 0x40-STORSIZE
|
||||||
|
|
||||||
PTR_ADDU t1, a0 /* end address */
|
PTR_ADDU t1, a0 /* end address */
|
||||||
.set reorder
|
.set reorder
|
||||||
1: PTR_ADDIU a0, 64
|
1: PTR_ADDIU a0, 64
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
f_fill64 a0, -64, a1, .Lfwd_fixup
|
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup
|
||||||
bne t1, a0, 1b
|
bne t1, a0, 1b
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
||||||
.Lmemset_partial:
|
.Lmemset_partial:
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
PTR_LA t1, 2f /* where to start */
|
PTR_LA t1, 2f /* where to start */
|
||||||
|
#ifdef CONFIG_CPU_MICROMIPS
|
||||||
|
LONG_SRL t7, t0, 1
|
||||||
|
#endif
|
||||||
#if LONGSIZE == 4
|
#if LONGSIZE == 4
|
||||||
PTR_SUBU t1, t0
|
PTR_SUBU t1, FILLPTRG
|
||||||
#else
|
#else
|
||||||
.set noat
|
.set noat
|
||||||
LONG_SRL AT, t0, 1
|
LONG_SRL AT, FILLPTRG, 1
|
||||||
PTR_SUBU t1, AT
|
PTR_SUBU t1, AT
|
||||||
.set at
|
.set at
|
||||||
#endif
|
#endif
|
||||||
@ -126,9 +150,9 @@ FEXPORT(__bzero)
|
|||||||
.set push
|
.set push
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set nomacro
|
.set nomacro
|
||||||
f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */
|
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */
|
||||||
2: .set pop
|
2: .set pop
|
||||||
andi a2, LONGMASK /* At most one long to go */
|
andi a2, STORMASK /* At most one long to go */
|
||||||
|
|
||||||
beqz a2, 1f
|
beqz a2, 1f
|
||||||
PTR_ADDU a0, a2 /* What's left */
|
PTR_ADDU a0, a2 /* What's left */
|
||||||
@ -169,7 +193,7 @@ FEXPORT(__bzero)
|
|||||||
|
|
||||||
.Lpartial_fixup:
|
.Lpartial_fixup:
|
||||||
PTR_L t0, TI_TASK($28)
|
PTR_L t0, TI_TASK($28)
|
||||||
andi a2, LONGMASK
|
andi a2, STORMASK
|
||||||
LONG_L t0, THREAD_BUADDR(t0)
|
LONG_L t0, THREAD_BUADDR(t0)
|
||||||
LONG_ADDU a2, t1
|
LONG_ADDU a2, t1
|
||||||
jr ra
|
jr ra
|
||||||
@ -177,4 +201,4 @@ FEXPORT(__bzero)
|
|||||||
|
|
||||||
.Llast_fixup:
|
.Llast_fixup:
|
||||||
jr ra
|
jr ra
|
||||||
andi v1, a2, LONGMASK
|
andi v1, a2, STORMASK
|
||||||
|
Loading…
Reference in New Issue
Block a user