linux/tools/testing/selftests/bpf/benchs/bench_trigger.c
Andrii Nakryiko d41bc48bfa selftests/bpf: Add uprobe triggering overhead benchmarks
Add benchmark to measure overhead of uprobes and uretprobes. Also have
a baseline (no uprobe attached) benchmark.

On my dev machine, baseline benchmark can trigger 130M user_target()
invocations. When uprobe is attached, this falls to just 700K. With
uretprobe, we get down to 520K:

  $ sudo ./bench trig-uprobe-base -a
  Summary: hits  131.289 ± 2.872M/s

  # UPROBE
  $ sudo ./bench -a trig-uprobe-without-nop
  Summary: hits    0.729 ± 0.007M/s

  $ sudo ./bench -a trig-uprobe-with-nop
  Summary: hits    1.798 ± 0.017M/s

  # URETPROBE
  $ sudo ./bench -a trig-uretprobe-without-nop
  Summary: hits    0.508 ± 0.012M/s

  $ sudo ./bench -a trig-uretprobe-with-nop
  Summary: hits    0.883 ± 0.008M/s

So there is almost 2.5x performance difference between probing nop vs
non-nop instruction for entry uprobe. And 1.7x difference for uretprobe.

This means that non-nop uprobe overhead is around 1.4 microseconds for uprobe
and 2 microseconds for non-nop uretprobe.

For nop variants, uprobe and uretprobe overhead is down to 0.556 and
1.13 microseconds, respectively.

For comparison, just doing a very low-overhead syscall (with no BPF
programs attached anywhere) gives:

  $ sudo ./bench trig-base -a
  Summary: hits    4.830 ± 0.036M/s

So uprobes are about 2.67x slower than pure context switch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211116013041.4072571-1-andrii@kernel.org
2021-11-16 14:46:49 +01:00

331 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include "bench.h"
#include "trigger_bench.skel.h"
#include "trace_helpers.h"
/* BPF triggering benchmarks */
static struct trigger_ctx {
struct trigger_bench *skel;
} ctx;
static struct counter base_hits;
static void trigger_validate()
{
if (env.consumer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
exit(1);
}
}
static void *trigger_base_producer(void *input)
{
while (true) {
(void)syscall(__NR_getpgid);
atomic_inc(&base_hits.value);
}
return NULL;
}
static void trigger_base_measure(struct bench_res *res)
{
res->hits = atomic_swap(&base_hits.value, 0);
}
static void *trigger_producer(void *input)
{
while (true)
(void)syscall(__NR_getpgid);
return NULL;
}
static void trigger_measure(struct bench_res *res)
{
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
}
static void setup_ctx()
{
setup_libbpf();
ctx.skel = trigger_bench__open_and_load();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
}
static void attach_bpf(struct bpf_program *prog)
{
struct bpf_link *link;
link = bpf_program__attach(prog);
if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
static void trigger_tp_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_tp);
}
static void trigger_rawtp_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
}
static void trigger_kprobe_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
static void trigger_fentry_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
static void trigger_fentry_sleep_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
}
static void trigger_fmodret_setup()
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
static void *trigger_consumer(void *input)
{
return NULL;
}
/* make sure call is not inlined and not avoided by compiler, so __weak and
* inline asm volatile in the body of the function
*
* There is a performance difference between uprobing at nop location vs other
* instructions. So use two different targets, one of which starts with nop
* and another doesn't.
*
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
__weak void uprobe_target_with_nop(void)
{
asm volatile ("nop");
}
__weak void uprobe_target_without_nop(void)
{
asm volatile ("");
}
static void *uprobe_base_producer(void *input)
{
while (true) {
uprobe_target_with_nop();
atomic_inc(&base_hits.value);
}
return NULL;
}
static void *uprobe_producer_with_nop(void *input)
{
while (true)
uprobe_target_with_nop();
return NULL;
}
static void *uprobe_producer_without_nop(void *input)
{
while (true)
uprobe_target_without_nop();
return NULL;
}
static void usetup(bool use_retprobe, bool use_nop)
{
size_t uprobe_offset;
ssize_t base_addr;
struct bpf_link *link;
setup_libbpf();
ctx.skel = trigger_bench__open_and_load();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
base_addr = get_base_addr();
if (use_nop)
uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
else
uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
"/proc/self/exe",
uprobe_offset);
if (!link) {
fprintf(stderr, "failed to attach uprobe!\n");
exit(1);
}
ctx.skel->links.bench_trigger_uprobe = link;
}
static void uprobe_setup_with_nop()
{
usetup(false, true);
}
static void uretprobe_setup_with_nop()
{
usetup(true, true);
}
static void uprobe_setup_without_nop()
{
usetup(false, false);
}
static void uretprobe_setup_without_nop()
{
usetup(true, false);
}
const struct bench bench_trig_base = {
.name = "trig-base",
.validate = trigger_validate,
.producer_thread = trigger_base_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_base_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_tp = {
.name = "trig-tp",
.validate = trigger_validate,
.setup = trigger_tp_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_rawtp = {
.name = "trig-rawtp",
.validate = trigger_validate,
.setup = trigger_rawtp_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_kprobe = {
.name = "trig-kprobe",
.validate = trigger_validate,
.setup = trigger_kprobe_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_fentry = {
.name = "trig-fentry",
.validate = trigger_validate,
.setup = trigger_fentry_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_fentry_sleep = {
.name = "trig-fentry-sleep",
.validate = trigger_validate,
.setup = trigger_fentry_sleep_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_fmodret = {
.name = "trig-fmodret",
.validate = trigger_validate,
.setup = trigger_fmodret_setup,
.producer_thread = trigger_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_uprobe_base = {
.name = "trig-uprobe-base",
.setup = NULL, /* no uprobe/uretprobe is attached */
.producer_thread = uprobe_base_producer,
.consumer_thread = trigger_consumer,
.measure = trigger_base_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_uprobe_with_nop = {
.name = "trig-uprobe-with-nop",
.setup = uprobe_setup_with_nop,
.producer_thread = uprobe_producer_with_nop,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_uretprobe_with_nop = {
.name = "trig-uretprobe-with-nop",
.setup = uretprobe_setup_with_nop,
.producer_thread = uprobe_producer_with_nop,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_uprobe_without_nop = {
.name = "trig-uprobe-without-nop",
.setup = uprobe_setup_without_nop,
.producer_thread = uprobe_producer_without_nop,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
const struct bench bench_trig_uretprobe_without_nop = {
.name = "trig-uretprobe-without-nop",
.setup = uretprobe_setup_without_nop,
.producer_thread = uprobe_producer_without_nop,
.consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};