mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 06:31:49 +00:00
samples/bpf: add lpm-trie benchmark
Extend the map_perf_test_{user,kern}.c infrastructure to stress test lpm-trie lookups. We hook into the kprobe on sys_gettid() and measure the latency depending on trie size and lookup count. On my Intel Haswell i7-6400U, a single gettid() syscall with an empty bpf program takes roughly 6.5us on my system. Lookups in empty tries take ~1.8us on first try, ~0.9us on retries. Lookups in tries with 8192 entries take ~7.1us (on the first _and_ any subsequent try). Signed-off-by: David Herrmann <dh.herrmann@gmail.com> Reviewed-by: Daniel Mack <daniel@zonque.org> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
4d3381f5a3
commit
b8a943e294
@ -57,6 +57,14 @@ struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
|
|||||||
.map_flags = BPF_F_NO_PREALLOC,
|
.map_flags = BPF_F_NO_PREALLOC,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
|
||||||
|
.type = BPF_MAP_TYPE_LPM_TRIE,
|
||||||
|
.key_size = 8,
|
||||||
|
.value_size = sizeof(long),
|
||||||
|
.max_entries = 10000,
|
||||||
|
.map_flags = BPF_F_NO_PREALLOC,
|
||||||
|
};
|
||||||
|
|
||||||
SEC("kprobe/sys_getuid")
|
SEC("kprobe/sys_getuid")
|
||||||
int stress_hmap(struct pt_regs *ctx)
|
int stress_hmap(struct pt_regs *ctx)
|
||||||
{
|
{
|
||||||
@ -135,5 +143,27 @@ int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SEC("kprobe/sys_gettid")
|
||||||
|
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
u32 b32[2];
|
||||||
|
u8 b8[8];
|
||||||
|
} key;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
key.b32[0] = 32;
|
||||||
|
key.b8[4] = 192;
|
||||||
|
key.b8[5] = 168;
|
||||||
|
key.b8[6] = 0;
|
||||||
|
key.b8[7] = 1;
|
||||||
|
|
||||||
|
#pragma clang loop unroll(full)
|
||||||
|
for (i = 0; i < 32; ++i)
|
||||||
|
bpf_map_lookup_elem(&lpm_trie_map_alloc, &key);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
char _license[] SEC("license") = "GPL";
|
char _license[] SEC("license") = "GPL";
|
||||||
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
||||||
|
@ -37,6 +37,7 @@ static __u64 time_get_ns(void)
|
|||||||
#define PERCPU_HASH_KMALLOC (1 << 3)
|
#define PERCPU_HASH_KMALLOC (1 << 3)
|
||||||
#define LRU_HASH_PREALLOC (1 << 4)
|
#define LRU_HASH_PREALLOC (1 << 4)
|
||||||
#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
|
#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
|
||||||
|
#define LPM_KMALLOC (1 << 6)
|
||||||
|
|
||||||
static int test_flags = ~0;
|
static int test_flags = ~0;
|
||||||
|
|
||||||
@ -112,6 +113,18 @@ static void test_percpu_hash_kmalloc(int cpu)
|
|||||||
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_lpm_kmalloc(int cpu)
|
||||||
|
{
|
||||||
|
__u64 start_time;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
start_time = time_get_ns();
|
||||||
|
for (i = 0; i < MAX_CNT; i++)
|
||||||
|
syscall(__NR_gettid);
|
||||||
|
printf("%d:lpm_perf kmalloc %lld events per sec\n",
|
||||||
|
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||||
|
}
|
||||||
|
|
||||||
static void loop(int cpu)
|
static void loop(int cpu)
|
||||||
{
|
{
|
||||||
cpu_set_t cpuset;
|
cpu_set_t cpuset;
|
||||||
@ -137,6 +150,9 @@ static void loop(int cpu)
|
|||||||
|
|
||||||
if (test_flags & PERCPU_LRU_HASH_PREALLOC)
|
if (test_flags & PERCPU_LRU_HASH_PREALLOC)
|
||||||
test_percpu_lru_hash_prealloc(cpu);
|
test_percpu_lru_hash_prealloc(cpu);
|
||||||
|
|
||||||
|
if (test_flags & LPM_KMALLOC)
|
||||||
|
test_lpm_kmalloc(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run_perf_test(int tasks)
|
static void run_perf_test(int tasks)
|
||||||
@ -162,6 +178,37 @@ static void run_perf_test(int tasks)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void fill_lpm_trie(void)
|
||||||
|
{
|
||||||
|
struct bpf_lpm_trie_key *key;
|
||||||
|
unsigned long value = 0;
|
||||||
|
unsigned int i;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
key = alloca(sizeof(*key) + 4);
|
||||||
|
key->prefixlen = 32;
|
||||||
|
|
||||||
|
for (i = 0; i < 512; ++i) {
|
||||||
|
key->prefixlen = rand() % 33;
|
||||||
|
key->data[0] = rand() & 0xff;
|
||||||
|
key->data[1] = rand() & 0xff;
|
||||||
|
key->data[2] = rand() & 0xff;
|
||||||
|
key->data[3] = rand() & 0xff;
|
||||||
|
r = bpf_map_update_elem(map_fd[6], key, &value, 0);
|
||||||
|
assert(!r);
|
||||||
|
}
|
||||||
|
|
||||||
|
key->prefixlen = 32;
|
||||||
|
key->data[0] = 192;
|
||||||
|
key->data[1] = 168;
|
||||||
|
key->data[2] = 0;
|
||||||
|
key->data[3] = 1;
|
||||||
|
value = 128;
|
||||||
|
|
||||||
|
r = bpf_map_update_elem(map_fd[6], key, &value, 0);
|
||||||
|
assert(!r);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||||
@ -182,6 +229,8 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fill_lpm_trie();
|
||||||
|
|
||||||
run_perf_test(num_cpu);
|
run_perf_test(num_cpu);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user