s390: implement memset16, memset32 & memset64
Provide fast versions of the new memset variants. E.g. the generic memset64 is ten times slower than the optimized version if used on a whole page. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
committed by
Martin Schwidefsky
parent
3bdf5679c9
commit
0b77d6701c
@@ -126,3 +126,47 @@ ENTRY(memcpy)
|
||||
.Lmemcpy_mvc:
|
||||
mvc 0(1,%r1),0(%r3)
|
||||
EXPORT_SYMBOL(memcpy)
|
||||
|
||||
/*
|
||||
* __memset16/32/64
|
||||
*
|
||||
* void *__memset16(uint16_t *s, uint16_t v, size_t count)
|
||||
* void *__memset32(uint32_t *s, uint32_t v, size_t count)
|
||||
* void *__memset64(uint64_t *s, uint64_t v, size_t count)
|
||||
*/
|
||||
.macro __MEMSET bits,bytes,insn
|
||||
ENTRY(__memset\bits)
|
||||
ltgr %r4,%r4
|
||||
bzr %r14
|
||||
cghi %r4,\bytes
|
||||
je .L__memset_exit\bits
|
||||
aghi %r4,-(\bytes+1)
|
||||
srlg %r5,%r4,8
|
||||
ltgr %r5,%r5
|
||||
lgr %r1,%r2
|
||||
jz .L__memset_remainder\bits
|
||||
.L__memset_loop\bits:
|
||||
\insn %r3,0(%r1)
|
||||
mvc \bytes(256-\bytes,%r1),0(%r1)
|
||||
la %r1,256(%r1)
|
||||
brctg %r5,.L__memset_loop\bits
|
||||
.L__memset_remainder\bits:
|
||||
\insn %r3,0(%r1)
|
||||
larl %r5,.L__memset_mvc\bits
|
||||
ex %r4,0(%r5)
|
||||
br %r14
|
||||
.L__memset_exit\bits:
|
||||
\insn %r3,0(%r2)
|
||||
br %r14
|
||||
.L__memset_mvc\bits:
|
||||
mvc \bytes(1,%r1),0(%r1)
|
||||
.endm
|
||||
|
||||
__MEMSET 16,2,sth
|
||||
EXPORT_SYMBOL(__memset16)
|
||||
|
||||
__MEMSET 32,4,st
|
||||
EXPORT_SYMBOL(__memset32)
|
||||
|
||||
__MEMSET 64,8,stg
|
||||
EXPORT_SYMBOL(__memset64)
|
||||
|
||||
Reference in New Issue
Block a user