mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 14:42:24 +00:00
f68edc9297
Currently find_first_and_bit() is an alias to find_next_and_bit(). However, it is widely used in cpumask, so it worth to optimize it. This patch adds its own implementation for find_first_and_bit(). On x86_64 find_bit_benchmark says: Before (#define find_first_and_bit(...) find_next_and_bit(..., 0): Start testing find_bit() with random-filled bitmap [ 140.291468] find_first_and_bit: 46890919 ns, 32671 iterations Start testing find_bit() with sparse bitmap [ 140.295028] find_first_and_bit: 7103 ns, 1 iterations After: Start testing find_bit() with random-filled bitmap [ 162.574907] find_first_and_bit: 25045813 ns, 32846 iterations Start testing find_bit() with sparse bitmap [ 162.578458] find_first_and_bit: 4900 ns, 1 iterations (Thanks to Alexey Klimov for thorough testing.) Signed-off-by: Yury Norov <yury.norov@gmail.com> Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com> Tested-by: Alexey Klimov <aklimov@redhat.com>
164 lines
3.7 KiB
C
164 lines
3.7 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* bit search implementation
|
|
*
|
|
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* Copyright (C) 2008 IBM Corporation
|
|
* 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
|
|
* (Inspired by David Howell's find_next_bit implementation)
|
|
*
|
|
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
|
|
* size and improve performance, 2015.
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/bitmap.h>
|
|
#include <linux/export.h>
|
|
#include <linux/math.h>
|
|
#include <linux/minmax.h>
|
|
#include <linux/swab.h>
|
|
|
|
#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
|
|
!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \
|
|
!defined(find_next_and_bit)
|
|
/*
|
|
* This is a common helper function for find_next_bit, find_next_zero_bit, and
|
|
* find_next_and_bit. The differences are:
|
|
* - The "invert" argument, which is XORed with each fetched word before
|
|
* searching it for one bits.
|
|
* - The optional "addr2", which is anded with "addr1" if present.
|
|
*/
|
|
unsigned long _find_next_bit(const unsigned long *addr1,
|
|
const unsigned long *addr2, unsigned long nbits,
|
|
unsigned long start, unsigned long invert, unsigned long le)
|
|
{
|
|
unsigned long tmp, mask;
|
|
|
|
if (unlikely(start >= nbits))
|
|
return nbits;
|
|
|
|
tmp = addr1[start / BITS_PER_LONG];
|
|
if (addr2)
|
|
tmp &= addr2[start / BITS_PER_LONG];
|
|
tmp ^= invert;
|
|
|
|
/* Handle 1st word. */
|
|
mask = BITMAP_FIRST_WORD_MASK(start);
|
|
if (le)
|
|
mask = swab(mask);
|
|
|
|
tmp &= mask;
|
|
|
|
start = round_down(start, BITS_PER_LONG);
|
|
|
|
while (!tmp) {
|
|
start += BITS_PER_LONG;
|
|
if (start >= nbits)
|
|
return nbits;
|
|
|
|
tmp = addr1[start / BITS_PER_LONG];
|
|
if (addr2)
|
|
tmp &= addr2[start / BITS_PER_LONG];
|
|
tmp ^= invert;
|
|
}
|
|
|
|
if (le)
|
|
tmp = swab(tmp);
|
|
|
|
return min(start + __ffs(tmp), nbits);
|
|
}
|
|
EXPORT_SYMBOL(_find_next_bit);
|
|
#endif
|
|
|
|
#ifndef find_first_bit
|
|
/*
|
|
* Find the first set bit in a memory region.
|
|
*/
|
|
unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
|
|
{
|
|
unsigned long idx;
|
|
|
|
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
|
|
if (addr[idx])
|
|
return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
EXPORT_SYMBOL(_find_first_bit);
|
|
#endif
|
|
|
|
#ifndef find_first_and_bit
|
|
/*
|
|
* Find the first set bit in two memory regions.
|
|
*/
|
|
unsigned long _find_first_and_bit(const unsigned long *addr1,
|
|
const unsigned long *addr2,
|
|
unsigned long size)
|
|
{
|
|
unsigned long idx, val;
|
|
|
|
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
|
|
val = addr1[idx] & addr2[idx];
|
|
if (val)
|
|
return min(idx * BITS_PER_LONG + __ffs(val), size);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
EXPORT_SYMBOL(_find_first_and_bit);
|
|
#endif
|
|
|
|
#ifndef find_first_zero_bit
|
|
/*
|
|
* Find the first cleared bit in a memory region.
|
|
*/
|
|
unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
|
|
{
|
|
unsigned long idx;
|
|
|
|
for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
|
|
if (addr[idx] != ~0UL)
|
|
return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
EXPORT_SYMBOL(_find_first_zero_bit);
|
|
#endif
|
|
|
|
#ifndef find_last_bit
|
|
unsigned long _find_last_bit(const unsigned long *addr, unsigned long size)
|
|
{
|
|
if (size) {
|
|
unsigned long val = BITMAP_LAST_WORD_MASK(size);
|
|
unsigned long idx = (size-1) / BITS_PER_LONG;
|
|
|
|
do {
|
|
val &= addr[idx];
|
|
if (val)
|
|
return idx * BITS_PER_LONG + __fls(val);
|
|
|
|
val = ~0ul;
|
|
} while (idx--);
|
|
}
|
|
return size;
|
|
}
|
|
EXPORT_SYMBOL(_find_last_bit);
|
|
#endif
|
|
|
|
unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
|
|
unsigned long size, unsigned long offset)
|
|
{
|
|
offset = find_next_bit(addr, size, offset);
|
|
if (offset == size)
|
|
return size;
|
|
|
|
offset = round_down(offset, 8);
|
|
*clump = bitmap_get_value8(addr, offset);
|
|
|
|
return offset;
|
|
}
|
|
EXPORT_SYMBOL(find_next_clump8);
|