[PATCH] x86-64: bitops fix for -Os

This fixes the x86-64 find_[first|next]_zero_bit() function for the
end-of-range case.  It didn't test for a zero size, and the "rep scas"
would do entirely the wrong thing.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Alexandre Oliva 2005-10-31 18:29:36 -02:00 committed by Linus Torvalds
parent cfa024f4e4
commit 06024f217d

View File

@ -5,19 +5,23 @@
#undef find_first_bit #undef find_first_bit
#undef find_next_bit #undef find_next_bit
/** static inline long
* find_first_zero_bit - find the first zero bit in a memory region __find_first_zero_bit(const unsigned long * addr, unsigned long size)
* @addr: The address to start the search at
* @size: The maximum size to search
*
* Returns the bit-number of the first zero bit, not the number of the byte
* containing a bit.
*/
inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
{ {
long d0, d1, d2; long d0, d1, d2;
long res; long res;
/*
* We must test the size in words, not in bits, because
* otherwise incoming sizes in the range -63..-1 will not run
* any scasq instructions, and then the flags used by the je
* instruction will have whatever random value was in place
* before. Nobody should call us like that, but
* find_next_zero_bit() does when offset and size are at the
* same word and it fails to find a zero itself.
*/
size += 63;
size >>= 6;
if (!size) if (!size)
return 0; return 0;
asm volatile( asm volatile(
@ -30,11 +34,29 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
" shlq $3,%%rdi\n" " shlq $3,%%rdi\n"
" addq %%rdi,%%rdx" " addq %%rdi,%%rdx"
:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
:"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
[addr] "r" (addr) : "memory"); [addr] "S" (addr) : "memory");
/*
* Any register would do for [addr] above, but GCC tends to
* prefer rbx over rsi, even though rsi is readily available
* and doesn't have to be saved.
*/
return res; return res;
} }
/**
* find_first_zero_bit - find the first zero bit in a memory region
* @addr: The address to start the search at
* @size: The maximum size to search
*
* Returns the bit-number of the first zero bit, not the number of the byte
* containing a bit.
*/
long find_first_zero_bit(const unsigned long * addr, unsigned long size)
{
return __find_first_zero_bit (addr, size);
}
/** /**
* find_next_zero_bit - find the first zero bit in a memory region * find_next_zero_bit - find the first zero bit in a memory region
* @addr: The address to base the search on * @addr: The address to base the search on
@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
*/ */
long find_next_zero_bit (const unsigned long * addr, long size, long offset) long find_next_zero_bit (const unsigned long * addr, long size, long offset)
{ {
unsigned long * p = ((unsigned long *) addr) + (offset >> 6); const unsigned long * p = addr + (offset >> 6);
unsigned long set = 0; unsigned long set = 0;
unsigned long res, bit = offset&63; unsigned long res, bit = offset&63;
@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset)
/* /*
* No zero yet, search remaining full words for a zero * No zero yet, search remaining full words for a zero
*/ */
res = find_first_zero_bit ((const unsigned long *)p, res = __find_first_zero_bit (p, size - 64 * (p - addr));
size - 64 * (p - (unsigned long *) addr));
return (offset + set + res); return (offset + set + res);
} }
@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
long d0, d1; long d0, d1;
long res; long res;
/*
* We must test the size in words, not in bits, because
* otherwise incoming sizes in the range -63..-1 will not run
* any scasq instructions, and then the flags used by the jz
* instruction will have whatever random value was in place
* before. Nobody should call us like that, but
* find_next_bit() does when offset and size are at the same
* word and it fails to find a one itself.
*/
size += 63;
size >>= 6;
if (!size)
return 0;
asm volatile( asm volatile(
" repe; scasq\n" " repe; scasq\n"
" jz 1f\n" " jz 1f\n"
@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
" shlq $3,%%rdi\n" " shlq $3,%%rdi\n"
" addq %%rdi,%%rax" " addq %%rdi,%%rax"
:"=a" (res), "=&c" (d0), "=&D" (d1) :"=a" (res), "=&c" (d0), "=&D" (d1)
:"0" (0ULL), :"0" (0ULL), "1" (size), "2" (addr),
"1" ((size + 63) >> 6), "2" (addr),
[addr] "r" (addr) : "memory"); [addr] "r" (addr) : "memory");
return res; return res;
} }