mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
string: memchr_inv() speed improvements
- Generate a 64-bit pattern more efficiently memchr_inv needs to generate a 64-bit pattern filled with a target character. The operation can be done by more efficient way. - Don't call the slow check_bytes() if the memory area is 64-bit aligned memchr_inv compares contiguous 64-bit words with the 64-bit pattern as much as possible. The outside of the region is checked by check_bytes() that scans for each byte. Unfortunately, the first 64-bit word is unexpectedly scanned by check_bytes() even if the memory area is aligned to a 64-bit boundary. Both changes were originally suggested by Eric Dumazet. Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Brian Norris <computersforpeace@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
a403d930c5
commit
f43804bf5f
20
lib/string.c
20
lib/string.c
@ -785,12 +785,24 @@ void *memchr_inv(const void *start, int c, size_t bytes)
|
||||
if (bytes <= 16)
|
||||
return check_bytes8(start, value, bytes);
|
||||
|
||||
value64 = value | value << 8 | value << 16 | value << 24;
|
||||
value64 = (value64 & 0xffffffff) | value64 << 32;
|
||||
prefix = 8 - ((unsigned long)start) % 8;
|
||||
value64 = value;
|
||||
#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
|
||||
value64 *= 0x0101010101010101;
|
||||
#elif defined(ARCH_HAS_FAST_MULTIPLIER)
|
||||
value64 *= 0x01010101;
|
||||
value64 |= value64 << 32;
|
||||
#else
|
||||
value64 |= value64 << 8;
|
||||
value64 |= value64 << 16;
|
||||
value64 |= value64 << 32;
|
||||
#endif
|
||||
|
||||
prefix = (unsigned long)start % 8;
|
||||
if (prefix) {
|
||||
u8 *r = check_bytes8(start, value, prefix);
|
||||
u8 *r;
|
||||
|
||||
prefix = 8 - prefix;
|
||||
r = check_bytes8(start, value, prefix);
|
||||
if (r)
|
||||
return r;
|
||||
start += prefix;
|
||||
|
Loading…
Reference in New Issue
Block a user