mirror of
https://github.com/torvalds/linux.git
synced 2024-11-01 09:41:44 +00:00
0136611c62
Based on patch from David Rientjes <rientjes@google.com>, but changed by AK. Optimizes the 64-bit hamming weight for x86_64 processors assuming they have fast multiplication. Uses five fewer bitops than the generic hweight64. Benchmark on one EMT64 showed ~25% speedup with 2^24 consecutive calls. Define a new ARCH_HAS_FAST_MULTIPLIER that can be set by other architectures that can also multiply fast. Signed-off-by: Andi Kleen <ak@suse.de>
422 lines
10 KiB
C
422 lines
10 KiB
C
#ifndef _X86_64_BITOPS_H
|
|
#define _X86_64_BITOPS_H
|
|
|
|
/*
|
|
* Copyright 1992, Linus Torvalds.
|
|
*/
|
|
|
|
#include <asm/alternative.h>
|
|
|
|
#define ADDR (*(volatile long *) addr)
|
|
|
|
/**
|
|
* set_bit - Atomically set a bit in memory
|
|
* @nr: the bit to set
|
|
* @addr: the address to start counting from
|
|
*
|
|
* This function is atomic and may not be reordered. See __set_bit()
|
|
* if you do not require the atomic guarantees.
|
|
* Note that @nr may be almost arbitrarily large; this function is not
|
|
* restricted to acting on a single-word quantity.
|
|
*/
|
|
static __inline__ void set_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btsl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
}
|
|
|
|
/**
|
|
* __set_bit - Set a bit in memory
|
|
* @nr: the bit to set
|
|
* @addr: the address to start counting from
|
|
*
|
|
* Unlike set_bit(), this function is non-atomic and may be reordered.
|
|
* If it's called on the same region of memory simultaneously, the effect
|
|
* may be that only one operation succeeds.
|
|
*/
|
|
static __inline__ void __set_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ volatile(
|
|
"btsl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
}
|
|
|
|
/**
|
|
* clear_bit - Clears a bit in memory
|
|
* @nr: Bit to clear
|
|
* @addr: Address to start counting from
|
|
*
|
|
* clear_bit() is atomic and may not be reordered. However, it does
|
|
* not contain a memory barrier, so if it is used for locking purposes,
|
|
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
|
|
* in order to ensure changes are visible on other processors.
|
|
*/
|
|
static __inline__ void clear_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btrl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
}
|
|
|
|
static __inline__ void __clear_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ __volatile__(
|
|
"btrl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
}
|
|
|
|
#define smp_mb__before_clear_bit() barrier()
|
|
#define smp_mb__after_clear_bit() barrier()
|
|
|
|
/**
|
|
* __change_bit - Toggle a bit in memory
|
|
* @nr: the bit to change
|
|
* @addr: the address to start counting from
|
|
*
|
|
* Unlike change_bit(), this function is non-atomic and may be reordered.
|
|
* If it's called on the same region of memory simultaneously, the effect
|
|
* may be that only one operation succeeds.
|
|
*/
|
|
static __inline__ void __change_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ __volatile__(
|
|
"btcl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
}
|
|
|
|
/**
|
|
* change_bit - Toggle a bit in memory
|
|
* @nr: Bit to change
|
|
* @addr: Address to start counting from
|
|
*
|
|
* change_bit() is atomic and may not be reordered.
|
|
* Note that @nr may be almost arbitrarily large; this function is not
|
|
* restricted to acting on a single-word quantity.
|
|
*/
|
|
static __inline__ void change_bit(int nr, volatile void * addr)
|
|
{
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btcl %1,%0"
|
|
:"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
}
|
|
|
|
/**
|
|
* test_and_set_bit - Set a bit and return its old value
|
|
* @nr: Bit to set
|
|
* @addr: Address to count from
|
|
*
|
|
* This operation is atomic and cannot be reordered.
|
|
* It also implies a memory barrier.
|
|
*/
|
|
static __inline__ int test_and_set_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btsl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
return oldbit;
|
|
}
|
|
|
|
/**
|
|
* __test_and_set_bit - Set a bit and return its old value
|
|
* @nr: Bit to set
|
|
* @addr: Address to count from
|
|
*
|
|
* This operation is non-atomic and can be reordered.
|
|
* If two examples of this operation race, one can appear to succeed
|
|
* but actually fail. You must protect multiple accesses with a lock.
|
|
*/
|
|
static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__(
|
|
"btsl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
return oldbit;
|
|
}
|
|
|
|
/**
|
|
* test_and_clear_bit - Clear a bit and return its old value
|
|
* @nr: Bit to clear
|
|
* @addr: Address to count from
|
|
*
|
|
* This operation is atomic and cannot be reordered.
|
|
* It also implies a memory barrier.
|
|
*/
|
|
static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btrl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
return oldbit;
|
|
}
|
|
|
|
/**
|
|
* __test_and_clear_bit - Clear a bit and return its old value
|
|
* @nr: Bit to clear
|
|
* @addr: Address to count from
|
|
*
|
|
* This operation is non-atomic and can be reordered.
|
|
* If two examples of this operation race, one can appear to succeed
|
|
* but actually fail. You must protect multiple accesses with a lock.
|
|
*/
|
|
static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__(
|
|
"btrl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr));
|
|
return oldbit;
|
|
}
|
|
|
|
/* WARNING: non atomic and it can be reordered! */
|
|
static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__ __volatile__(
|
|
"btcl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
return oldbit;
|
|
}
|
|
|
|
/**
|
|
* test_and_change_bit - Change a bit and return its old value
|
|
* @nr: Bit to change
|
|
* @addr: Address to count from
|
|
*
|
|
* This operation is atomic and cannot be reordered.
|
|
* It also implies a memory barrier.
|
|
*/
|
|
static __inline__ int test_and_change_bit(int nr, volatile void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__ __volatile__( LOCK_PREFIX
|
|
"btcl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit),"+m" (ADDR)
|
|
:"dIr" (nr) : "memory");
|
|
return oldbit;
|
|
}
|
|
|
|
#if 0 /* Fool kernel-doc since it doesn't do macros yet */
|
|
/**
|
|
* test_bit - Determine whether a bit is set
|
|
* @nr: bit number to test
|
|
* @addr: Address to start counting from
|
|
*/
|
|
static int test_bit(int nr, const volatile void * addr);
|
|
#endif
|
|
|
|
static __inline__ int constant_test_bit(int nr, const volatile void * addr)
|
|
{
|
|
return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
|
|
}
|
|
|
|
static __inline__ int variable_test_bit(int nr, volatile const void * addr)
|
|
{
|
|
int oldbit;
|
|
|
|
__asm__ __volatile__(
|
|
"btl %2,%1\n\tsbbl %0,%0"
|
|
:"=r" (oldbit)
|
|
:"m" (ADDR),"dIr" (nr));
|
|
return oldbit;
|
|
}
|
|
|
|
#define test_bit(nr,addr) \
|
|
(__builtin_constant_p(nr) ? \
|
|
constant_test_bit((nr),(addr)) : \
|
|
variable_test_bit((nr),(addr)))
|
|
|
|
#undef ADDR
|
|
|
|
extern long find_first_zero_bit(const unsigned long * addr, unsigned long size);
|
|
extern long find_next_zero_bit (const unsigned long * addr, long size, long offset);
|
|
extern long find_first_bit(const unsigned long * addr, unsigned long size);
|
|
extern long find_next_bit(const unsigned long * addr, long size, long offset);
|
|
|
|
/* return index of first bet set in val or max when no bit is set */
|
|
static inline unsigned long __scanbit(unsigned long val, unsigned long max)
|
|
{
|
|
asm("bsfq %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
|
|
return val;
|
|
}
|
|
|
|
#define find_first_bit(addr,size) \
|
|
((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
|
|
(__scanbit(*(unsigned long *)addr,(size))) : \
|
|
find_first_bit(addr,size)))
|
|
|
|
#define find_next_bit(addr,size,off) \
|
|
((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
|
|
((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
|
|
find_next_bit(addr,size,off)))
|
|
|
|
#define find_first_zero_bit(addr,size) \
|
|
((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
|
|
(__scanbit(~*(unsigned long *)addr,(size))) : \
|
|
find_first_zero_bit(addr,size)))
|
|
|
|
#define find_next_zero_bit(addr,size,off) \
|
|
((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
|
|
((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
|
|
find_next_zero_bit(addr,size,off)))
|
|
|
|
/*
|
|
* Find string of zero bits in a bitmap. -1 when not found.
|
|
*/
|
|
extern unsigned long
|
|
find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len);
|
|
|
|
static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
|
|
int len)
|
|
{
|
|
unsigned long end = i + len;
|
|
while (i < end) {
|
|
__set_bit(i, bitmap);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i,
|
|
int len)
|
|
{
|
|
unsigned long end = i + len;
|
|
while (i < end) {
|
|
__clear_bit(i, bitmap);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* ffz - find first zero in word.
|
|
* @word: The word to search
|
|
*
|
|
* Undefined if no zero exists, so code should check against ~0UL first.
|
|
*/
|
|
static __inline__ unsigned long ffz(unsigned long word)
|
|
{
|
|
__asm__("bsfq %1,%0"
|
|
:"=r" (word)
|
|
:"r" (~word));
|
|
return word;
|
|
}
|
|
|
|
/**
|
|
* __ffs - find first bit in word.
|
|
* @word: The word to search
|
|
*
|
|
* Undefined if no bit exists, so code should check against 0 first.
|
|
*/
|
|
static __inline__ unsigned long __ffs(unsigned long word)
|
|
{
|
|
__asm__("bsfq %1,%0"
|
|
:"=r" (word)
|
|
:"rm" (word));
|
|
return word;
|
|
}
|
|
|
|
/*
|
|
* __fls: find last bit set.
|
|
* @word: The word to search
|
|
*
|
|
* Undefined if no zero exists, so code should check against ~0UL first.
|
|
*/
|
|
static __inline__ unsigned long __fls(unsigned long word)
|
|
{
|
|
__asm__("bsrq %1,%0"
|
|
:"=r" (word)
|
|
:"rm" (word));
|
|
return word;
|
|
}
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <asm-generic/bitops/sched.h>
|
|
|
|
/**
|
|
* ffs - find first bit set
|
|
* @x: the word to search
|
|
*
|
|
* This is defined the same way as
|
|
* the libc and compiler builtin ffs routines, therefore
|
|
* differs in spirit from the above ffz (man ffs).
|
|
*/
|
|
static __inline__ int ffs(int x)
|
|
{
|
|
int r;
|
|
|
|
__asm__("bsfl %1,%0\n\t"
|
|
"cmovzl %2,%0"
|
|
: "=r" (r) : "rm" (x), "r" (-1));
|
|
return r+1;
|
|
}
|
|
|
|
/**
|
|
* fls64 - find last bit set in 64 bit word
|
|
* @x: the word to search
|
|
*
|
|
* This is defined the same way as fls.
|
|
*/
|
|
static __inline__ int fls64(__u64 x)
|
|
{
|
|
if (x == 0)
|
|
return 0;
|
|
return __fls(x) + 1;
|
|
}
|
|
|
|
/**
|
|
* fls - find last bit set
|
|
* @x: the word to search
|
|
*
|
|
* This is defined the same way as ffs.
|
|
*/
|
|
static __inline__ int fls(int x)
|
|
{
|
|
int r;
|
|
|
|
__asm__("bsrl %1,%0\n\t"
|
|
"cmovzl %2,%0"
|
|
: "=&r" (r) : "rm" (x), "rm" (-1));
|
|
return r+1;
|
|
}
|
|
|
|
#define ARCH_HAS_FAST_MULTIPLIER 1
|
|
|
|
#include <asm-generic/bitops/hweight.h>
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <asm-generic/bitops/ext2-non-atomic.h>
|
|
|
|
#define ext2_set_bit_atomic(lock,nr,addr) \
|
|
test_and_set_bit((nr),(unsigned long*)addr)
|
|
#define ext2_clear_bit_atomic(lock,nr,addr) \
|
|
test_and_clear_bit((nr),(unsigned long*)addr)
|
|
|
|
#include <asm-generic/bitops/minix.h>
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* _X86_64_BITOPS_H */
|