mirror of
https://github.com/torvalds/linux.git
synced 2024-12-12 14:12:51 +00:00
993fef95b9
Like for the memset16/32/64 variants avoid that subsequent mvc instructions depend on each other since that might have negative performance impacts. This patch is currently hardly relevant since at least gcc 7.1 generates only inline memset code and not a single memset call. However there is no reason to not provide an optimized version just in case gcc generates memset calls again, like it did in the past. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
177 lines
3.3 KiB
ArmAsm
177 lines
3.3 KiB
ArmAsm
/*
|
|
* String handling functions.
|
|
*
|
|
* Copyright IBM Corp. 2012
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/export.h>
|
|
|
|
/*
|
|
* void *memmove(void *dest, const void *src, size_t n)
|
|
*/
|
|
ENTRY(memmove)
|
|
ltgr %r4,%r4
|
|
lgr %r1,%r2
|
|
bzr %r14
|
|
aghi %r4,-1
|
|
clgr %r2,%r3
|
|
jnh .Lmemmove_forward
|
|
la %r5,1(%r4,%r3)
|
|
clgr %r2,%r5
|
|
jl .Lmemmove_reverse
|
|
.Lmemmove_forward:
|
|
srlg %r0,%r4,8
|
|
ltgr %r0,%r0
|
|
jz .Lmemmove_forward_remainder
|
|
.Lmemmove_forward_loop:
|
|
mvc 0(256,%r1),0(%r3)
|
|
la %r1,256(%r1)
|
|
la %r3,256(%r3)
|
|
brctg %r0,.Lmemmove_forward_loop
|
|
.Lmemmove_forward_remainder:
|
|
larl %r5,.Lmemmove_mvc
|
|
ex %r4,0(%r5)
|
|
br %r14
|
|
.Lmemmove_reverse:
|
|
ic %r0,0(%r4,%r3)
|
|
stc %r0,0(%r4,%r1)
|
|
brctg %r4,.Lmemmove_reverse
|
|
ic %r0,0(%r4,%r3)
|
|
stc %r0,0(%r4,%r1)
|
|
br %r14
|
|
.Lmemmove_mvc:
|
|
mvc 0(1,%r1),0(%r3)
|
|
EXPORT_SYMBOL(memmove)
|
|
|
|
/*
|
|
* memset implementation
|
|
*
|
|
* This code corresponds to the C construct below. We do distinguish
|
|
* between clearing (c == 0) and setting a memory array (c != 0) simply
|
|
* because nearly all memset invocations in the kernel clear memory and
|
|
* the xc instruction is preferred in such cases.
|
|
*
|
|
* void *memset(void *s, int c, size_t n)
|
|
* {
|
|
* if (likely(c == 0))
|
|
* return __builtin_memset(s, 0, n);
|
|
* return __builtin_memset(s, c, n);
|
|
* }
|
|
*/
|
|
ENTRY(memset)
|
|
ltgr %r4,%r4
|
|
bzr %r14
|
|
ltgr %r3,%r3
|
|
jnz .Lmemset_fill
|
|
aghi %r4,-1
|
|
srlg %r3,%r4,8
|
|
ltgr %r3,%r3
|
|
lgr %r1,%r2
|
|
jz .Lmemset_clear_remainder
|
|
.Lmemset_clear_loop:
|
|
xc 0(256,%r1),0(%r1)
|
|
la %r1,256(%r1)
|
|
brctg %r3,.Lmemset_clear_loop
|
|
.Lmemset_clear_remainder:
|
|
larl %r3,.Lmemset_xc
|
|
ex %r4,0(%r3)
|
|
br %r14
|
|
.Lmemset_fill:
|
|
cghi %r4,1
|
|
lgr %r1,%r2
|
|
je .Lmemset_fill_exit
|
|
aghi %r4,-2
|
|
srlg %r5,%r4,8
|
|
ltgr %r5,%r5
|
|
jz .Lmemset_fill_remainder
|
|
.Lmemset_fill_loop:
|
|
stc %r3,0(%r1)
|
|
mvc 1(255,%r1),0(%r1)
|
|
la %r1,256(%r1)
|
|
brctg %r5,.Lmemset_fill_loop
|
|
.Lmemset_fill_remainder:
|
|
stc %r3,0(%r1)
|
|
larl %r5,.Lmemset_mvc
|
|
ex %r4,0(%r5)
|
|
br %r14
|
|
.Lmemset_fill_exit:
|
|
stc %r3,0(%r1)
|
|
br %r14
|
|
.Lmemset_xc:
|
|
xc 0(1,%r1),0(%r1)
|
|
.Lmemset_mvc:
|
|
mvc 1(1,%r1),0(%r1)
|
|
EXPORT_SYMBOL(memset)
|
|
|
|
/*
|
|
* memcpy implementation
|
|
*
|
|
* void *memcpy(void *dest, const void *src, size_t n)
|
|
*/
|
|
ENTRY(memcpy)
|
|
ltgr %r4,%r4
|
|
bzr %r14
|
|
aghi %r4,-1
|
|
srlg %r5,%r4,8
|
|
ltgr %r5,%r5
|
|
lgr %r1,%r2
|
|
jnz .Lmemcpy_loop
|
|
.Lmemcpy_remainder:
|
|
larl %r5,.Lmemcpy_mvc
|
|
ex %r4,0(%r5)
|
|
br %r14
|
|
.Lmemcpy_loop:
|
|
mvc 0(256,%r1),0(%r3)
|
|
la %r1,256(%r1)
|
|
la %r3,256(%r3)
|
|
brctg %r5,.Lmemcpy_loop
|
|
j .Lmemcpy_remainder
|
|
.Lmemcpy_mvc:
|
|
mvc 0(1,%r1),0(%r3)
|
|
EXPORT_SYMBOL(memcpy)
|
|
|
|
/*
|
|
* __memset16/32/64
|
|
*
|
|
* void *__memset16(uint16_t *s, uint16_t v, size_t count)
|
|
* void *__memset32(uint32_t *s, uint32_t v, size_t count)
|
|
* void *__memset64(uint64_t *s, uint64_t v, size_t count)
|
|
*/
|
|
.macro __MEMSET bits,bytes,insn
|
|
ENTRY(__memset\bits)
|
|
ltgr %r4,%r4
|
|
bzr %r14
|
|
cghi %r4,\bytes
|
|
je .L__memset_exit\bits
|
|
aghi %r4,-(\bytes+1)
|
|
srlg %r5,%r4,8
|
|
ltgr %r5,%r5
|
|
lgr %r1,%r2
|
|
jz .L__memset_remainder\bits
|
|
.L__memset_loop\bits:
|
|
\insn %r3,0(%r1)
|
|
mvc \bytes(256-\bytes,%r1),0(%r1)
|
|
la %r1,256(%r1)
|
|
brctg %r5,.L__memset_loop\bits
|
|
.L__memset_remainder\bits:
|
|
\insn %r3,0(%r1)
|
|
larl %r5,.L__memset_mvc\bits
|
|
ex %r4,0(%r5)
|
|
br %r14
|
|
.L__memset_exit\bits:
|
|
\insn %r3,0(%r2)
|
|
br %r14
|
|
.L__memset_mvc\bits:
|
|
mvc \bytes(1,%r1),0(%r1)
|
|
.endm
|
|
|
|
__MEMSET 16,2,sth
|
|
EXPORT_SYMBOL(__memset16)
|
|
|
|
__MEMSET 32,4,st
|
|
EXPORT_SYMBOL(__memset32)
|
|
|
|
__MEMSET 64,8,stg
|
|
EXPORT_SYMBOL(__memset64)
|