linux/tools/testing/selftests/bpf/bench.h
Joanne Koong ec151037af selftest/bpf/benchs: Add bpf_loop benchmark
Add benchmark to measure the throughput and latency of the bpf_loop
call.

Testing this on my dev machine on 1 thread, the data is as follows:

        nr_loops: 10
bpf_loop - throughput: 198.519 ± 0.155 M ops/s, latency: 5.037 ns/op

        nr_loops: 100
bpf_loop - throughput: 247.448 ± 0.305 M ops/s, latency: 4.041 ns/op

        nr_loops: 500
bpf_loop - throughput: 260.839 ± 0.380 M ops/s, latency: 3.834 ns/op

        nr_loops: 1000
bpf_loop - throughput: 262.806 ± 0.629 M ops/s, latency: 3.805 ns/op

        nr_loops: 5000
bpf_loop - throughput: 264.211 ± 1.508 M ops/s, latency: 3.785 ns/op

        nr_loops: 10000
bpf_loop - throughput: 265.366 ± 3.054 M ops/s, latency: 3.768 ns/op

        nr_loops: 50000
bpf_loop - throughput: 235.986 ± 20.205 M ops/s, latency: 4.238 ns/op

        nr_loops: 100000
bpf_loop - throughput: 264.482 ± 0.279 M ops/s, latency: 3.781 ns/op

        nr_loops: 500000
bpf_loop - throughput: 309.773 ± 87.713 M ops/s, latency: 3.228 ns/op

        nr_loops: 1000000
bpf_loop - throughput: 262.818 ± 4.143 M ops/s, latency: 3.805 ns/op

>From this data, we can see that the latency per loop decreases as the
number of loops increases. On this particular machine, each loop had an
overhead of about ~4 ns, and we were able to run ~250 million loops
per second.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-5-joannekoong@fb.com
2021-11-30 10:56:28 -08:00

87 lines
1.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#pragma once
#include <stdlib.h>
#include <stdbool.h>
#include <linux/err.h>
#include <errno.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <math.h>
#include <time.h>
#include <sys/syscall.h>
struct cpu_set {
bool *cpus;
int cpus_len;
int next_cpu;
};
struct env {
char *bench_name;
int duration_sec;
int warmup_sec;
bool verbose;
bool list;
bool affinity;
int consumer_cnt;
int producer_cnt;
struct cpu_set prod_cpus;
struct cpu_set cons_cpus;
};
struct bench_res {
long hits;
long drops;
long false_hits;
};
struct bench {
const char *name;
void (*validate)();
void (*setup)();
void *(*producer_thread)(void *ctx);
void *(*consumer_thread)(void *ctx);
void (*measure)(struct bench_res* res);
void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
void (*report_final)(struct bench_res res[], int res_cnt);
};
struct counter {
long value;
} __attribute__((aligned(128)));
extern struct env env;
extern const struct bench *bench;
void setup_libbpf();
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
void hits_drops_report_final(struct bench_res res[], int res_cnt);
void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
void false_hits_report_final(struct bench_res res[], int res_cnt);
void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
void ops_report_final(struct bench_res res[], int res_cnt);
static inline __u64 get_time_ns() {
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
}
static inline void atomic_inc(long *value)
{
(void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
}
static inline void atomic_add(long *value, long n)
{
(void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
}
static inline long atomic_swap(long *value, long n)
{
return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
}