selftests: udp gso benchmark

Send udp data between a source and sink, optionally with udp gso.
The two processes are expected to be run on separate hosts.

A script is included that runs them together over loopback in a
single namespace for functionality testing.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Willem de Bruijn 2018-04-26 13:42:25 -04:00 committed by David S. Miller
parent 3f12817fe3
commit 3a687bef14
5 changed files with 763 additions and 1 deletions

View File

@ -9,3 +9,5 @@ reuseport_dualstack
reuseaddr_conflict
tcp_mmap
udpgso
udpgso_bench_rx
udpgso_bench_tx

View File

@ -6,12 +6,13 @@ CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh
TEST_PROGS += udpgso_bench.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_FILES += tcp_mmap
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
TEST_GEN_PROGS += udpgso
TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx
include ../lib.mk

View File

@ -0,0 +1,74 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# Run a series of udpgso benchmarks
wake_children() {
local -r jobs="$(jobs -p)"
if [[ "${jobs}" != "" ]]; then
kill -1 ${jobs} 2>/dev/null
fi
}
trap wake_children EXIT
run_one() {
local -r args=$@
./udpgso_bench_rx &
./udpgso_bench_rx -t &
./udpgso_bench_tx ${args}
}
run_in_netns() {
local -r args=$@
./in_netns.sh $0 __subprocess ${args}
}
run_udp() {
local -r args=$@
echo "udp"
run_in_netns ${args}
echo "udp gso"
run_in_netns ${args} -S
echo "udp gso zerocopy"
run_in_netns ${args} -S -z
}
run_tcp() {
local -r args=$@
echo "tcp"
run_in_netns ${args} -t
echo "tcp zerocopy"
run_in_netns ${args} -t -z
}
run_all() {
local -r core_args="-l 4"
local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
local -r ipv6_args="${core_args} -6 -D ::1"
echo "ipv4"
run_tcp "${ipv4_args}"
run_udp "${ipv4_args}"
echo "ipv6"
run_tcp "${ipv4_args}"
run_udp "${ipv6_args}"
}
if [[ $# -eq 0 ]]; then
run_all
elif [[ $1 == "__subprocess" ]]; then
shift
run_one $@
else
run_in_netns $@
fi

View File

@ -0,0 +1,265 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
static int cfg_port = 8000;
static bool cfg_tcp;
static bool cfg_verify;
static bool interrupted;
static unsigned long packets, bytes;
static void sigint_handler(int signum)
{
if (signum == SIGINT)
interrupted = true;
}
static unsigned long gettimeofday_ms(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}
static void do_poll(int fd)
{
struct pollfd pfd;
int ret;
pfd.events = POLLIN;
pfd.revents = 0;
pfd.fd = fd;
do {
ret = poll(&pfd, 1, 10);
if (ret == -1)
error(1, errno, "poll");
if (ret == 0)
continue;
if (pfd.revents != POLLIN)
error(1, errno, "poll: 0x%x expected 0x%x\n",
pfd.revents, POLLIN);
} while (!ret && !interrupted);
}
static int do_socket(bool do_tcp)
{
struct sockaddr_in6 addr = {0};
int fd, val;
fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
if (fd == -1)
error(1, errno, "socket");
val = 1 << 21;
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
error(1, errno, "setsockopt rcvbuf");
val = 1;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
error(1, errno, "setsockopt reuseport");
addr.sin6_family = PF_INET6;
addr.sin6_port = htons(cfg_port);
addr.sin6_addr = in6addr_any;
if (bind(fd, (void *) &addr, sizeof(addr)))
error(1, errno, "bind");
if (do_tcp) {
int accept_fd = fd;
if (listen(accept_fd, 1))
error(1, errno, "listen");
do_poll(accept_fd);
fd = accept(accept_fd, NULL, NULL);
if (fd == -1)
error(1, errno, "accept");
if (close(accept_fd))
error(1, errno, "close accept fd");
}
return fd;
}
/* Flush all outstanding bytes for the tcp receive queue */
static void do_flush_tcp(int fd)
{
int ret;
while (true) {
/* MSG_TRUNC flushes up to len bytes */
ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
if (ret == -1 && errno == EAGAIN)
return;
if (ret == -1)
error(1, errno, "flush");
if (ret == 0) {
/* client detached */
exit(0);
}
packets++;
bytes += ret;
}
}
static char sanitized_char(char val)
{
return (val >= 'a' && val <= 'z') ? val : '.';
}
static void do_verify_udp(const char *data, int len)
{
char cur = data[0];
int i;
/* verify contents */
if (cur < 'a' || cur > 'z')
error(1, 0, "data initial byte out of range");
for (i = 1; i < len; i++) {
if (cur == 'z')
cur = 'a';
else
cur++;
if (data[i] != cur)
error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
i, len,
sanitized_char(data[i]), data[i],
sanitized_char(cur), cur);
}
}
/* Flush all outstanding datagrams. Verify first few bytes of each. */
static void do_flush_udp(int fd)
{
static char rbuf[ETH_DATA_LEN];
int ret, len, budget = 256;
len = cfg_verify ? sizeof(rbuf) : 0;
while (budget--) {
/* MSG_TRUNC will make return value full datagram length */
ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
if (ret == -1 && errno == EAGAIN)
return;
if (ret == -1)
error(1, errno, "recv");
if (len) {
if (ret == 0)
error(1, errno, "recv: 0 byte datagram\n");
do_verify_udp(rbuf, ret);
}
packets++;
bytes += ret;
}
}
static void usage(const char *filepath)
{
error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
}
static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "ptv")) != -1) {
switch (c) {
case 'p':
cfg_port = htons(strtoul(optarg, NULL, 0));
break;
case 't':
cfg_tcp = true;
break;
case 'v':
cfg_verify = true;
break;
}
}
if (optind != argc)
usage(argv[0]);
if (cfg_tcp && cfg_verify)
error(1, 0, "TODO: implement verify mode for tcp");
}
static void do_recv(void)
{
unsigned long tnow, treport;
int fd;
fd = do_socket(cfg_tcp);
treport = gettimeofday_ms() + 1000;
do {
do_poll(fd);
if (cfg_tcp)
do_flush_tcp(fd);
else
do_flush_udp(fd);
tnow = gettimeofday_ms();
if (tnow > treport) {
if (packets)
fprintf(stderr,
"%s rx: %6lu MB/s %8lu calls/s\n",
cfg_tcp ? "tcp" : "udp",
bytes >> 20, packets);
bytes = packets = 0;
treport = tnow + 1000;
}
} while (!interrupted);
if (close(fd))
error(1, errno, "close");
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
signal(SIGINT, sigint_handler);
do_recv();
return 0;
}

View File

@ -0,0 +1,420 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#ifndef ETH_MAX_MTU
#define ETH_MAX_MTU 0xFFFFU
#endif
#ifndef UDP_SEGMENT
#define UDP_SEGMENT 103
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
#define NUM_PKT 100
static bool cfg_cache_trash;
static int cfg_cpu = -1;
static int cfg_connected = true;
static int cfg_family = PF_UNSPEC;
static uint16_t cfg_mss;
static int cfg_payload_len = (1472 * 42);
static int cfg_port = 8000;
static int cfg_runtime_ms = -1;
static bool cfg_segment;
static bool cfg_sendmmsg;
static bool cfg_tcp;
static bool cfg_zerocopy;
static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static bool interrupted;
static char buf[NUM_PKT][ETH_MAX_MTU];
static void sigint_handler(int signum)
{
if (signum == SIGINT)
interrupted = true;
}
static unsigned long gettimeofday_ms(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}
static int set_cpu(int cpu)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask))
error(1, 0, "setaffinity %d", cpu);
return 0;
}
static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
{
struct sockaddr_in6 *addr6 = (void *) sockaddr;
struct sockaddr_in *addr4 = (void *) sockaddr;
switch (domain) {
case PF_INET:
addr4->sin_family = AF_INET;
addr4->sin_port = htons(cfg_port);
if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
error(1, 0, "ipv4 parse error: %s", str_addr);
break;
case PF_INET6:
addr6->sin6_family = AF_INET6;
addr6->sin6_port = htons(cfg_port);
if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
error(1, 0, "ipv6 parse error: %s", str_addr);
break;
default:
error(1, 0, "illegal domain");
}
}
static void flush_zerocopy(int fd)
{
struct msghdr msg = {0}; /* flush */
int ret;
while (1) {
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
break;
if (ret == -1)
error(1, errno, "errqueue");
if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
msg.msg_flags = 0;
}
}
static int send_tcp(int fd, char *data)
{
int ret, done = 0, count = 0;
while (done < cfg_payload_len) {
ret = send(fd, data + done, cfg_payload_len - done,
cfg_zerocopy ? MSG_ZEROCOPY : 0);
if (ret == -1)
error(1, errno, "write");
done += ret;
count++;
}
return count;
}
static int send_udp(int fd, char *data)
{
int ret, total_len, len, count = 0;
total_len = cfg_payload_len;
while (total_len) {
len = total_len < cfg_mss ? total_len : cfg_mss;
ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
cfg_connected ? NULL : (void *)&cfg_dst_addr,
cfg_connected ? 0 : cfg_alen);
if (ret == -1)
error(1, errno, "write");
if (ret != len)
error(1, errno, "write: %uB != %uB\n", ret, len);
total_len -= len;
count++;
}
return count;
}
static int send_udp_sendmmsg(int fd, char *data)
{
const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
struct mmsghdr mmsgs[max_nr_msg];
struct iovec iov[max_nr_msg];
unsigned int off = 0, left;
int i = 0, ret;
memset(mmsgs, 0, sizeof(mmsgs));
left = cfg_payload_len;
while (left) {
if (i == max_nr_msg)
error(1, 0, "sendmmsg: exceeds max_nr_msg");
iov[i].iov_base = data + off;
iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
mmsgs[i].msg_hdr.msg_iov = iov + i;
mmsgs[i].msg_hdr.msg_iovlen = 1;
off += iov[i].iov_len;
left -= iov[i].iov_len;
i++;
}
ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
if (ret == -1)
error(1, errno, "sendmmsg");
return ret;
}
static void send_udp_segment_cmsg(struct cmsghdr *cm)
{
uint16_t *valp;
cm->cmsg_level = SOL_UDP;
cm->cmsg_type = UDP_SEGMENT;
cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
valp = (void *)CMSG_DATA(cm);
*valp = cfg_mss;
}
static int send_udp_segment(int fd, char *data)
{
char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
struct msghdr msg = {0};
struct iovec iov = {0};
int ret;
iov.iov_base = data;
iov.iov_len = cfg_payload_len;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
msg.msg_name = (void *)&cfg_dst_addr;
msg.msg_namelen = cfg_alen;
ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
if (ret == -1)
error(1, errno, "sendmsg");
if (ret != iov.iov_len)
error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
return 1;
}
static void usage(const char *filepath)
{
error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
filepath);
}
static void parse_opts(int argc, char **argv)
{
int max_len, hdrlen;
int c;
while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
error(1, 0, "Pass one of -4 or -6");
cfg_family = PF_INET;
cfg_alen = sizeof(struct sockaddr_in);
break;
case '6':
if (cfg_family != PF_UNSPEC)
error(1, 0, "Pass one of -4 or -6");
cfg_family = PF_INET6;
cfg_alen = sizeof(struct sockaddr_in6);
break;
case 'c':
cfg_cache_trash = true;
break;
case 'C':
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
break;
case 'm':
cfg_sendmmsg = true;
break;
case 'p':
cfg_port = strtoul(optarg, NULL, 0);
break;
case 's':
cfg_payload_len = strtoul(optarg, NULL, 0);
break;
case 'S':
cfg_segment = true;
break;
case 't':
cfg_tcp = true;
break;
case 'u':
cfg_connected = false;
break;
case 'z':
cfg_zerocopy = true;
break;
}
}
if (optind != argc)
usage(argv[0]);
if (cfg_family == PF_UNSPEC)
error(1, 0, "must pass one of -4 or -6");
if (cfg_tcp && !cfg_connected)
error(1, 0, "connectionless tcp makes no sense");
if (cfg_segment && cfg_sendmmsg)
error(1, 0, "cannot combine segment offload and sendmmsg");
if (cfg_family == PF_INET)
hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
else
hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
cfg_mss = ETH_DATA_LEN - hdrlen;
max_len = ETH_MAX_MTU - hdrlen;
if (cfg_payload_len > max_len)
error(1, 0, "payload length %u exceeds max %u",
cfg_payload_len, max_len);
}
static void set_pmtu_discover(int fd, bool is_ipv4)
{
int level, name, val;
if (is_ipv4) {
level = SOL_IP;
name = IP_MTU_DISCOVER;
val = IP_PMTUDISC_DO;
} else {
level = SOL_IPV6;
name = IPV6_MTU_DISCOVER;
val = IPV6_PMTUDISC_DO;
}
if (setsockopt(fd, level, name, &val, sizeof(val)))
error(1, errno, "setsockopt path mtu");
}
int main(int argc, char **argv)
{
unsigned long num_msgs, num_sends;
unsigned long tnow, treport, tstop;
int fd, i, val;
parse_opts(argc, argv);
if (cfg_cpu > 0)
set_cpu(cfg_cpu);
for (i = 0; i < sizeof(buf[0]); i++)
buf[0][i] = 'a' + (i % 26);
for (i = 1; i < NUM_PKT; i++)
memcpy(buf[i], buf[0], sizeof(buf[0]));
signal(SIGINT, sigint_handler);
fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
if (fd == -1)
error(1, errno, "socket");
if (cfg_zerocopy) {
val = 1;
if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
error(1, errno, "setsockopt zerocopy");
}
if (cfg_connected &&
connect(fd, (void *)&cfg_dst_addr, cfg_alen))
error(1, errno, "connect");
if (cfg_segment)
set_pmtu_discover(fd, cfg_family == PF_INET);
num_msgs = num_sends = 0;
tnow = gettimeofday_ms();
tstop = tnow + cfg_runtime_ms;
treport = tnow + 1000;
i = 0;
do {
if (cfg_tcp)
num_sends += send_tcp(fd, buf[i]);
else if (cfg_segment)
num_sends += send_udp_segment(fd, buf[i]);
else if (cfg_sendmmsg)
num_sends += send_udp_sendmmsg(fd, buf[i]);
else
num_sends += send_udp(fd, buf[i]);
num_msgs++;
if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
flush_zerocopy(fd);
tnow = gettimeofday_ms();
if (tnow > treport) {
fprintf(stderr,
"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
cfg_tcp ? "tcp" : "udp",
(num_msgs * cfg_payload_len) >> 20,
num_sends, num_msgs);
num_msgs = num_sends = 0;
treport = tnow + 1000;
}
/* cold cache when writing buffer */
if (cfg_cache_trash)
i = ++i < NUM_PKT ? i : 0;
} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
if (close(fd))
error(1, errno, "close");
return 0;
}