linux/lib/find_bit_benchmark.c
Jason A. Donenfeld 81895a65ec treewide: use prandom_u32_max() when possible, part 1
Rather than incurring a division or requesting too many random bytes for
the given range, use the prandom_u32_max() function, which only takes
the minimum required bytes from the RNG and avoids divisions. This was
done mechanically with this coccinelle script:

@basic@
expression E;
type T;
identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32";
typedef u64;
@@
(
- ((T)get_random_u32() % (E))
+ prandom_u32_max(E)
|
- ((T)get_random_u32() & ((E) - 1))
+ prandom_u32_max(E * XXX_MAKE_SURE_E_IS_POW2)
|
- ((u64)(E) * get_random_u32() >> 32)
+ prandom_u32_max(E)
|
- ((T)get_random_u32() & ~PAGE_MASK)
+ prandom_u32_max(PAGE_SIZE)
)

@multi_line@
identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32";
identifier RAND;
expression E;
@@

-       RAND = get_random_u32();
        ... when != RAND
-       RAND %= (E);
+       RAND = prandom_u32_max(E);

// Find a potential literal
@literal_mask@
expression LITERAL;
type T;
identifier get_random_u32 =~ "get_random_int|prandom_u32|get_random_u32";
position p;
@@

        ((T)get_random_u32()@p & (LITERAL))

// Add one to the literal.
@script:python add_one@
literal << literal_mask.LITERAL;
RESULT;
@@

value = None
if literal.startswith('0x'):
        value = int(literal, 16)
elif literal[0] in '123456789':
        value = int(literal, 10)
if value is None:
        print("I don't know how to handle %s" % (literal))
        cocci.include_match(False)
elif value == 2**32 - 1 or value == 2**31 - 1 or value == 2**24 - 1 or value == 2**16 - 1 or value == 2**8 - 1:
        print("Skipping 0x%x for cleanup elsewhere" % (value))
        cocci.include_match(False)
elif value & (value + 1) != 0:
        print("Skipping 0x%x because it's not a power of two minus one" % (value))
        cocci.include_match(False)
elif literal.startswith('0x'):
        coccinelle.RESULT = cocci.make_expr("0x%x" % (value + 1))
else:
        coccinelle.RESULT = cocci.make_expr("%d" % (value + 1))

// Replace the literal mask with the calculated result.
@plus_one@
expression literal_mask.LITERAL;
position literal_mask.p;
expression add_one.RESULT;
identifier FUNC;
@@

-       (FUNC()@p & (LITERAL))
+       prandom_u32_max(RESULT)

@collapse_ret@
type T;
identifier VAR;
expression E;
@@

 {
-       T VAR;
-       VAR = (E);
-       return VAR;
+       return E;
 }

@drop_var@
type T;
identifier VAR;
@@

 {
-       T VAR;
        ... when != VAR
 }

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Yury Norov <yury.norov@gmail.com>
Reviewed-by: KP Singh <kpsingh@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz> # for ext4 and sbitmap
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com> # for drbd
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # for s390
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # for mmc
Acked-by: Darrick J. Wong <djwong@kernel.org> # for xfs
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-10-11 17:42:55 -06:00

198 lines
4.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Test for find_*_bit functions.
*
* Copyright (c) 2017 Cavium.
*/
/*
* find_bit functions are widely used in kernel, so the successful boot
* is good enough test for correctness.
*
* This test is focused on performance of traversing bitmaps. Two typical
* scenarios are reproduced:
* - randomly filled bitmap with approximately equal number of set and
* cleared bits;
* - sparse bitmap with few set bits at random positions.
*/
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/random.h>
#define BITMAP_LEN (4096UL * 8 * 10)
#define SPARSE 500
static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata;
/*
* This is Schlemiel the Painter's algorithm. It should be called after
* all other tests for the same bitmap because it sets all bits of bitmap to 1.
*/
static int __init test_find_first_bit(void *bitmap, unsigned long len)
{
unsigned long i, cnt;
ktime_t time;
time = ktime_get();
for (cnt = i = 0; i < len; cnt++) {
i = find_first_bit(bitmap, len);
__clear_bit(i, bitmap);
}
time = ktime_get() - time;
pr_err("find_first_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len)
{
static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
unsigned long i, cnt;
ktime_t time;
bitmap_copy(cp, bitmap, BITMAP_LEN);
time = ktime_get();
for (cnt = i = 0; i < len; cnt++) {
i = find_first_and_bit(cp, bitmap2, len);
__clear_bit(i, cp);
}
time = ktime_get() - time;
pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init test_find_next_bit(const void *bitmap, unsigned long len)
{
unsigned long i, cnt;
ktime_t time;
time = ktime_get();
for (cnt = i = 0; i < BITMAP_LEN; cnt++)
i = find_next_bit(bitmap, BITMAP_LEN, i) + 1;
time = ktime_get() - time;
pr_err("find_next_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
{
unsigned long i, cnt;
ktime_t time;
time = ktime_get();
for (cnt = i = 0; i < BITMAP_LEN; cnt++)
i = find_next_zero_bit(bitmap, len, i) + 1;
time = ktime_get() - time;
pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init test_find_last_bit(const void *bitmap, unsigned long len)
{
unsigned long l, cnt = 0;
ktime_t time;
time = ktime_get();
do {
cnt++;
l = find_last_bit(bitmap, len);
if (l >= len)
break;
len = l;
} while (len);
time = ktime_get() - time;
pr_err("find_last_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init test_find_nth_bit(const unsigned long *bitmap, unsigned long len)
{
unsigned long l, n, w = bitmap_weight(bitmap, len);
ktime_t time;
time = ktime_get();
for (n = 0; n < w; n++) {
l = find_nth_bit(bitmap, len, n);
WARN_ON(l >= len);
}
time = ktime_get() - time;
pr_err("find_nth_bit: %18llu ns, %6ld iterations\n", time, w);
return 0;
}
static int __init test_find_next_and_bit(const void *bitmap,
const void *bitmap2, unsigned long len)
{
unsigned long i, cnt;
ktime_t time;
time = ktime_get();
for (cnt = i = 0; i < BITMAP_LEN; cnt++)
i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
time = ktime_get() - time;
pr_err("find_next_and_bit: %18llu ns, %6ld iterations\n", time, cnt);
return 0;
}
static int __init find_bit_test(void)
{
unsigned long nbits = BITMAP_LEN / SPARSE;
pr_err("\nStart testing find_bit() with random-filled bitmap\n");
get_random_bytes(bitmap, sizeof(bitmap));
get_random_bytes(bitmap2, sizeof(bitmap2));
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_nth_bit(bitmap, BITMAP_LEN / 10);
/*
* test_find_first_bit() may take some time, so
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
bitmap_zero(bitmap, BITMAP_LEN);
bitmap_zero(bitmap2, BITMAP_LEN);
while (nbits--) {
__set_bit(prandom_u32_max(BITMAP_LEN), bitmap);
__set_bit(prandom_u32_max(BITMAP_LEN), bitmap2);
}
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_nth_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
/*
* Everything is OK. Return error just to let user run benchmark
* again without annoying rmmod.
*/
return -EINVAL;
}
module_init(find_bit_test);
MODULE_LICENSE("GPL");