samples: bpf: Add devmap_xmit tracepoint statistics support

This adds support for retrieval and printing for devmap_xmit total and
mutli mode tracepoint. For multi mode, we keep a hash map entry for each
redirection stream, such that we can dynamically add and remove entries
on output.

The from_match and to_match will be set by individual samples when
setting up the XDP program on these devices.

The multi mode tracepoint is also handy for xdp_redirect_map_multi,
where up to 32 devices can be specified.

Also add samples_init_pre_load macro to finally set up the resized maps
and mmap them in place for low overhead stats retrieval.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210821002010.845777-12-memxor@gmail.com
This commit is contained in:
Kumar Kartikeya Dwivedi 2021-08-21 05:49:59 +05:30 committed by Alexei Starovoitov
parent 5f116212f4
commit af93d58c27
2 changed files with 331 additions and 3 deletions

View File

@ -77,6 +77,8 @@ enum map_type {
MAP_CPUMAP_ENQUEUE,
MAP_CPUMAP_KTHREAD,
MAP_EXCEPTION,
MAP_DEVMAP_XMIT,
MAP_DEVMAP_XMIT_MULTI,
NUM_MAP,
};
@ -103,6 +105,8 @@ struct stats_record {
struct record redir_err[XDP_REDIRECT_ERR_MAX];
struct record kthread;
struct record exception[XDP_ACTION_MAX];
struct record devmap_xmit;
DECLARE_HASHTABLE(xmit_map, 5);
struct record enq[];
};
@ -111,7 +115,9 @@ struct sample_output {
__u64 rx;
__u64 redir;
__u64 drop;
__u64 drop_xmit;
__u64 err;
__u64 xmit;
} totals;
struct {
__u64 pps;
@ -125,6 +131,12 @@ struct sample_output {
struct {
__u64 hits;
} except_cnt;
struct {
__u64 pps;
__u64 drop;
__u64 err;
double bavg;
} xmit_cnt;
};
struct xdp_desc {
@ -265,6 +277,16 @@ static void sample_print_help(int mask)
" \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n");
}
if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
" \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
" \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
" \t\t\thence they will be omitted from the output when using SKB mode\n"
" \t\t\t\txmit/s - Number of packets that were transmitted per second\n"
" \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n"
" \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
" \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
}
}
void sample_usage(char *argv[], const struct option *long_options,
@ -353,6 +375,74 @@ static void map_collect_percpu(struct datarec *values, struct record *rec)
rec->total.xdp_redirect = sum_xdp_redirect;
}
static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
__u32 batch, count = 32;
struct datarec *values;
bool init = false;
__u64 *keys;
int i, ret;
keys = calloc(count, sizeof(__u64));
if (!keys)
return -ENOMEM;
values = calloc(count * nr_cpus, sizeof(struct datarec));
if (!values) {
free(keys);
return -ENOMEM;
}
for (;;) {
bool exit = false;
ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
keys, values, &count, NULL);
if (ret < 0 && errno != ENOENT)
break;
if (errno == ENOENT)
exit = true;
init = true;
for (i = 0; i < count; i++) {
struct map_entry *e, *x = NULL;
__u64 pair = keys[i];
struct datarec *arr;
arr = &values[i * nr_cpus];
hash_for_each_possible(rec->xmit_map, e, node, pair) {
if (e->pair == pair) {
x = e;
break;
}
}
if (!x) {
x = calloc(1, sizeof(*x));
if (!x)
goto cleanup;
if (map_entry_init(x, pair) < 0) {
free(x);
goto cleanup;
}
hash_add(rec->xmit_map, &x->node, pair);
}
map_collect_percpu(arr, &x->val);
}
if (exit)
break;
count = 32;
}
free(values);
free(keys);
return 0;
cleanup:
free(values);
free(keys);
return -ENOMEM;
}
static struct stats_record *alloc_stats_record(void)
{
struct stats_record *rec;
@ -408,6 +498,16 @@ static struct stats_record *alloc_stats_record(void)
}
}
}
if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
rec->devmap_xmit.cpu = alloc_record_per_cpu();
if (!rec->devmap_xmit.cpu) {
fprintf(stderr,
"Failed to allocate devmap_xmit per-CPU array\n");
goto end_exception;
}
}
if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
hash_init(rec->xmit_map);
if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
for (i = 0; i < sample_n_cpus; i++) {
rec->enq[i].cpu = alloc_record_per_cpu();
@ -418,13 +518,15 @@ static struct stats_record *alloc_stats_record(void)
i);
while (i--)
free(rec->enq[i].cpu);
goto end_exception;
goto end_devmap_xmit;
}
}
}
return rec;
end_devmap_xmit:
free(rec->devmap_xmit.cpu);
end_exception:
for (i = 0; i < XDP_ACTION_MAX; i++)
free(rec->exception[i].cpu);
@ -448,6 +550,12 @@ static void free_stats_record(struct stats_record *r)
for (i = 0; i < sample_n_cpus; i++)
free(r->enq[i].cpu);
hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
hash_del(&e->node);
free(e->val.cpu);
free(e);
}
free(r->devmap_xmit.cpu);
for (i = 0; i < XDP_ACTION_MAX; i++)
free(r->exception[i].cpu);
free(r->kthread.cpu);
@ -835,6 +943,160 @@ static void stats_get_exception_cnt(struct stats_record *stats_rec,
}
}
static void stats_get_devmap_xmit(struct stats_record *stats_rec,
struct stats_record *stats_prev,
unsigned int nr_cpus,
struct sample_output *out)
{
double pps, drop, info, err;
struct record *rec, *prev;
double t;
int i;
rec = &stats_rec->devmap_xmit;
prev = &stats_prev->devmap_xmit;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
char str[64];
pps = calc_pps(r, p, t);
drop = calc_drop_pps(r, p, t);
err = calc_errs_pps(r, p, t);
if (!pps && !drop && !err)
continue;
snprintf(str, sizeof(str), "cpu:%d", i);
info = calc_info_pps(r, p, t);
if (info > 0)
info = (pps + drop) / info; /* calc avg bulk */
print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
__COLUMN(".2f") "\n",
str, XMIT(pps), DROP(drop), err, "drv_err/s",
info, "bulk-avg");
}
if (out) {
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop_pps(&rec->total, &prev->total, t);
info = calc_info_pps(&rec->total, &prev->total, t);
if (info > 0)
info = (pps + drop) / info; /* calc avg bulk */
err = calc_errs_pps(&rec->total, &prev->total, t);
out->xmit_cnt.pps = pps;
out->xmit_cnt.drop = drop;
out->xmit_cnt.bavg = info;
out->xmit_cnt.err = err;
out->totals.xmit += pps;
out->totals.drop_xmit += drop;
out->totals.err += err;
}
}
static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
struct stats_record *stats_prev,
unsigned int nr_cpus,
struct sample_output *out,
bool xmit_total)
{
double pps, drop, info, err;
struct map_entry *entry;
struct record *r, *p;
double t;
int bkt;
hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
struct map_entry *e, *x = NULL;
char ifname_from[IFNAMSIZ];
char ifname_to[IFNAMSIZ];
const char *fstr, *tstr;
unsigned long prev_time;
struct record beg = {};
__u32 from_idx, to_idx;
char str[128];
__u64 pair;
int i;
prev_time = sample_interval * NANOSEC_PER_SEC;
pair = entry->pair;
from_idx = pair >> 32;
to_idx = pair & 0xFFFFFFFF;
r = &entry->val;
beg.timestamp = r->timestamp - prev_time;
/* Find matching entry from stats_prev map */
hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
if (e->pair == pair) {
x = e;
break;
}
}
if (x)
p = &x->val;
else
p = &beg;
t = calc_period(r, p);
pps = calc_pps(&r->total, &p->total, t);
drop = calc_drop_pps(&r->total, &p->total, t);
info = calc_info_pps(&r->total, &p->total, t);
if (info > 0)
info = (pps + drop) / info; /* calc avg bulk */
err = calc_errs_pps(&r->total, &p->total, t);
if (out) {
/* We are responsible for filling out totals */
out->totals.xmit += pps;
out->totals.drop_xmit += drop;
out->totals.err += err;
continue;
}
fstr = tstr = NULL;
if (if_indextoname(from_idx, ifname_from))
fstr = ifname_from;
if (if_indextoname(to_idx, ifname_to))
tstr = ifname_to;
snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
tstr ?: "?");
/* Skip idle streams of redirection */
if (pps || drop || err) {
print_err(drop,
" %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
__COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
err, "drv_err/s", info, "bulk-avg");
}
for (i = 0; i < nr_cpus; i++) {
struct datarec *rc = &r->cpu[i];
struct datarec *pc, p_beg = {};
char str[64];
pc = p == &beg ? &p_beg : &p->cpu[i];
pps = calc_pps(rc, pc, t);
drop = calc_drop_pps(rc, pc, t);
err = calc_errs_pps(rc, pc, t);
if (!pps && !drop && !err)
continue;
snprintf(str, sizeof(str), "cpu:%d", i);
info = calc_info_pps(rc, pc, t);
if (info > 0)
info = (pps + drop) / info; /* calc avg bulk */
print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
__COLUMN(".2f") "\n", str, XMIT(pps),
DROP(drop), err, "drv_err/s", info, "bulk-avg");
}
}
}
static void stats_print(const char *prefix, int mask, struct stats_record *r,
struct stats_record *p, struct sample_output *out)
{
@ -849,6 +1111,9 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r,
printf(FMT_COLUMNl,
out->totals.err + out->totals.drop + out->totals.drop_xmit,
"err,drop/s");
if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
printf(FMT_COLUMNl, XMIT(out->totals.xmit));
printf("\n");
if (mask & SAMPLE_RX_CNT) {
@ -899,6 +1164,25 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r,
stats_get_exception_cnt(r, p, nr_cpus, NULL);
}
if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
"devmap_xmit total" :
"devmap_xmit";
print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
" %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
__COLUMN(".2f") "\n",
str, XMIT(out->xmit_cnt.pps),
DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
"drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
stats_get_devmap_xmit(r, p, nr_cpus, NULL);
}
if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
mask & SAMPLE_DEVMAP_XMIT_CNT);
if (sample_log_level & LL_DEFAULT ||
((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
sample_err_exp = false;
@ -910,12 +1194,13 @@ int sample_setup_maps(struct bpf_map **maps)
{
sample_n_cpus = libbpf_num_possible_cpus();
for (int i = 0; i < NUM_MAP; i++) {
for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
sample_map[i] = maps[i];
switch (i) {
case MAP_RX:
case MAP_CPUMAP_KTHREAD:
case MAP_DEVMAP_XMIT:
sample_map_count[i] = sample_n_cpus;
break;
case MAP_REDIRECT_ERR:
@ -933,12 +1218,13 @@ int sample_setup_maps(struct bpf_map **maps)
if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
return -errno;
}
sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
return 0;
}
static int sample_setup_maps_mappings(void)
{
for (int i = 0; i < NUM_MAP; i++) {
for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
size_t size = sample_map_count[i] * sizeof(struct datarec);
sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
@ -1057,9 +1343,20 @@ static void sample_summary_print(void)
if (sample_out.totals.drop)
print_always(" Rx dropped : %'-10llu\n",
sample_out.totals.drop);
if (sample_out.totals.drop_xmit)
print_always(" Tx dropped : %'-10llu\n",
sample_out.totals.drop_xmit);
if (sample_out.totals.err)
print_always(" Errors recorded : %'-10llu\n",
sample_out.totals.err);
if (sample_out.totals.xmit) {
double pkts = sample_out.totals.xmit;
print_always(" Packets transmitted : %'-10llu\n",
sample_out.totals.xmit);
print_always(" Average transmit/s : %'-10.0f\n",
sample_round(pkts / period));
}
}
void sample_exit(int status)
@ -1115,6 +1412,13 @@ static int sample_stats_collect(struct stats_record *rec)
map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
&rec->exception[i]);
if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
return -EINVAL;
}
return 0;
}
@ -1123,7 +1427,9 @@ static void sample_summary_update(struct sample_output *out, int interval)
sample_out.totals.rx += out->totals.rx;
sample_out.totals.redir += out->totals.redir;
sample_out.totals.drop += out->totals.drop;
sample_out.totals.drop_xmit += out->totals.drop_xmit;
sample_out.totals.err += out->totals.err;
sample_out.totals.xmit += out->totals.xmit;
sample_out.rx_cnt.pps += interval;
}
@ -1141,6 +1447,11 @@ static void sample_stats_print(int mask, struct stats_record *cur,
stats_get_redirect_err_cnt(cur, prev, 0, &out);
if (mask & SAMPLE_EXCEPTION_CNT)
stats_get_exception_cnt(cur, prev, 0, &out);
if (mask & SAMPLE_DEVMAP_XMIT_CNT)
stats_get_devmap_xmit(cur, prev, 0, &out);
else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
stats_get_devmap_xmit_multi(cur, prev, 0, &out,
mask & SAMPLE_DEVMAP_XMIT_CNT);
sample_summary_update(&out, interval);
stats_print(prog_name, mask, cur, prev, &out);

View File

@ -14,9 +14,11 @@ enum stats_mask {
SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
SAMPLE_EXCEPTION_CNT = 1U << 5,
SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
SAMPLE_REDIRECT_CNT = 1U << 7,
SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
};
/* Exit return codes */
@ -63,6 +65,17 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size)
return -errno; \
})
#define sample_init_pre_load(skel) \
({ \
skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
sample_setup_maps((struct bpf_map *[]){ \
skel->maps.rx_cnt, skel->maps.redir_err_cnt, \
skel->maps.cpumap_enqueue_cnt, \
skel->maps.cpumap_kthread_cnt, \
skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
skel->maps.devmap_xmit_cnt_multi }); \
})
#define DEFINE_SAMPLE_INIT(name) \
static int sample_init(struct name *skel, int mask) \
{ \
@ -84,6 +97,10 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size)
__attach_tp(tp_xdp_cpumap_kthread); \
if (mask & SAMPLE_EXCEPTION_CNT) \
__attach_tp(tp_xdp_exception); \
if (mask & SAMPLE_DEVMAP_XMIT_CNT) \
__attach_tp(tp_xdp_devmap_xmit); \
if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
__attach_tp(tp_xdp_devmap_xmit_multi); \
return 0; \
}