selftests: add ncdevmem, netcat for devmem TCP

ncdevmem is a devmem TCP netcat. It works similarly to netcat, but it
sends and receives data using the devmem TCP APIs. It uses udmabuf as
the dmabuf provider. It is compatible with a regular netcat running on
a peer, or a ncdevmem running on a peer.

In addition to normal netcat support, ncdevmem has a validation mode,
where it sends a specific pattern and validates this pattern on the
receiver side to ensure data integrity.

Suggested-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Link: https://patch.msgid.link/20240910171458.219195-13-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Mina Almasry 2024-09-10 17:14:56 +00:00 committed by Jakub Kicinski
parent 09d1db26b5
commit 85585b4bc8
4 changed files with 581 additions and 0 deletions

View File

@ -1 +1,2 @@
__pycache__/
*.d

View File

@ -17,6 +17,7 @@ ipv6_flowlabel
ipv6_flowlabel_mgr
log.txt
msg_zerocopy
ncdevmem
nettest
psock_fanout
psock_snd

View File

@ -97,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
# YNL files, must be before "include ..lib.mk"
EXTRA_CLEAN += $(OUTPUT)/libynl.a
YNL_GEN_FILES := ncdevmem
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
@ -106,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
# YNL build
YNL_GENS := netdev
include ynl.mk
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto

View File

@ -0,0 +1,570 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
#include <linux/uio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#define __iovec_defined
#include <fcntl.h>
#include <malloc.h>
#include <error.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <linux/memfd.h>
#include <linux/dma-buf.h>
#include <linux/udmabuf.h>
#include <libmnl/libmnl.h>
#include <linux/types.h>
#include <linux/netlink.h>
#include <linux/genetlink.h>
#include <linux/netdev.h>
#include <time.h>
#include <net/if.h>
#include "netdev-user.h"
#include <ynl.h>
#define PAGE_SHIFT 12
#define TEST_PREFIX "ncdevmem"
#define NUM_PAGES 16000
#ifndef MSG_SOCK_DEVMEM
#define MSG_SOCK_DEVMEM 0x2000000
#endif
/*
* tcpdevmem netcat. Works similarly to netcat but does device memory TCP
* instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
*
* Usage:
*
* On server:
* ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7
*
* On client:
* yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
* tr \\n \\0 | \
* head -c 5G | \
* nc <server IP> 5201 -p 5201
*
* Note this is compatible with regular netcat. i.e. the sender or receiver can
* be replaced with regular netcat to test the RX or TX path in isolation.
*/
static char *server_ip = "192.168.1.4";
static char *client_ip = "192.168.1.2";
static char *port = "5201";
static size_t do_validation;
static int start_queue = 8;
static int num_queues = 8;
static char *ifname = "eth1";
static unsigned int ifindex;
static unsigned int dmabuf_id;
void print_bytes(void *ptr, size_t size)
{
unsigned char *p = ptr;
int i;
for (i = 0; i < size; i++)
printf("%02hhX ", p[i]);
printf("\n");
}
void print_nonzero_bytes(void *ptr, size_t size)
{
unsigned char *p = ptr;
unsigned int i;
for (i = 0; i < size; i++)
putchar(p[i]);
printf("\n");
}
void validate_buffer(void *line, size_t size)
{
static unsigned char seed = 1;
unsigned char *ptr = line;
int errors = 0;
size_t i;
for (i = 0; i < size; i++) {
if (ptr[i] != seed) {
fprintf(stderr,
"Failed validation: expected=%u, actual=%u, index=%lu\n",
seed, ptr[i], i);
errors++;
if (errors > 20)
error(1, 0, "validation failed.");
}
seed++;
if (seed == do_validation)
seed = 0;
}
fprintf(stdout, "Validated buffer\n");
}
#define run_command(cmd, ...) \
({ \
char command[256]; \
memset(command, 0, sizeof(command)); \
snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
printf("Running: %s\n", command); \
system(command); \
})
static int reset_flow_steering(void)
{
int ret = 0;
ret = run_command("sudo ethtool -K %s ntuple off", ifname);
if (ret)
return ret;
return run_command("sudo ethtool -K %s ntuple on", ifname);
}
static int configure_headersplit(bool on)
{
return run_command("sudo ethtool -G %s tcp-data-split %s", ifname,
on ? "on" : "off");
}
static int configure_rss(void)
{
return run_command("sudo ethtool -X %s equal %d", ifname, start_queue);
}
static int configure_channels(unsigned int rx, unsigned int tx)
{
return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
}
static int configure_flow_steering(void)
{
return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d",
ifname, client_ip, server_ip, port, port, start_queue);
}
static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
struct netdev_queue_id *queues,
unsigned int n_queue_index, struct ynl_sock **ys)
{
struct netdev_bind_rx_req *req = NULL;
struct netdev_bind_rx_rsp *rsp = NULL;
struct ynl_error yerr;
*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!*ys) {
fprintf(stderr, "YNL: %s\n", yerr.msg);
return -1;
}
req = netdev_bind_rx_req_alloc();
netdev_bind_rx_req_set_ifindex(req, ifindex);
netdev_bind_rx_req_set_fd(req, dmabuf_fd);
__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
rsp = netdev_bind_rx(*ys, req);
if (!rsp) {
perror("netdev_bind_rx");
goto err_close;
}
if (!rsp->_present.id) {
perror("id not present");
goto err_close;
}
printf("got dmabuf id=%d\n", rsp->id);
dmabuf_id = rsp->id;
netdev_bind_rx_req_free(req);
netdev_bind_rx_rsp_free(rsp);
return 0;
err_close:
fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
netdev_bind_rx_req_free(req);
ynl_sock_destroy(*ys);
return -1;
}
static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size)
{
struct udmabuf_create create;
int ret;
*devfd = open("/dev/udmabuf", O_RDWR);
if (*devfd < 0) {
error(70, 0,
"%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
TEST_PREFIX);
}
*memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
if (*memfd < 0)
error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX);
/* Required for udmabuf */
ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK);
if (ret < 0)
error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
ret = ftruncate(*memfd, dmabuf_size);
if (ret == -1)
error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
memset(&create, 0, sizeof(create));
create.memfd = *memfd;
create.offset = 0;
create.size = dmabuf_size;
*buf = ioctl(*devfd, UDMABUF_CREATE, &create);
if (*buf < 0)
error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
}
int do_server(void)
{
char ctrl_data[sizeof(int) * 20000];
struct netdev_queue_id *queues;
size_t non_page_aligned_frags = 0;
struct sockaddr_in client_addr;
struct sockaddr_in server_sin;
size_t page_aligned_frags = 0;
int devfd, memfd, buf, ret;
size_t total_received = 0;
socklen_t client_addr_len;
bool is_devmem = false;
char *buf_mem = NULL;
struct ynl_sock *ys;
size_t dmabuf_size;
char iobuf[819200];
char buffer[256];
int socket_fd;
int client_fd;
size_t i = 0;
int opt = 1;
dmabuf_size = getpagesize() * NUM_PAGES;
create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
if (reset_flow_steering())
error(1, 0, "Failed to reset flow steering\n");
/* Configure RSS to divert all traffic from our devmem queues */
if (configure_rss())
error(1, 0, "Failed to configure rss\n");
/* Flow steer our devmem flows to start_queue */
if (configure_flow_steering())
error(1, 0, "Failed to configure flow steering\n");
sleep(1);
queues = malloc(sizeof(*queues) * num_queues);
for (i = 0; i < num_queues; i++) {
queues[i]._present.type = 1;
queues[i]._present.id = 1;
queues[i].type = NETDEV_QUEUE_TYPE_RX;
queues[i].id = start_queue + i;
}
if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
error(1, 0, "Failed to bind\n");
buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
buf, 0);
if (buf_mem == MAP_FAILED)
error(1, 0, "mmap()");
server_sin.sin_family = AF_INET;
server_sin.sin_port = htons(atoi(port));
ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr);
if (socket < 0)
error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX);
socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0);
if (socket < 0)
error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt,
sizeof(opt));
if (ret)
error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt,
sizeof(opt));
if (ret)
error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
printf("binding to address %s:%d\n", server_ip,
ntohs(server_sin.sin_port));
ret = bind(socket_fd, &server_sin, sizeof(server_sin));
if (ret)
error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
ret = listen(socket_fd, 1);
if (ret)
error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
client_addr_len = sizeof(client_addr);
inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer,
sizeof(buffer));
printf("Waiting or connection on %s:%d\n", buffer,
ntohs(server_sin.sin_port));
client_fd = accept(socket_fd, &client_addr, &client_addr_len);
inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer,
sizeof(buffer));
printf("Got connection from %s:%d\n", buffer,
ntohs(client_addr.sin_port));
while (1) {
struct iovec iov = { .iov_base = iobuf,
.iov_len = sizeof(iobuf) };
struct dmabuf_cmsg *dmabuf_cmsg = NULL;
struct dma_buf_sync sync = { 0 };
struct cmsghdr *cm = NULL;
struct msghdr msg = { 0 };
struct dmabuf_token token;
ssize_t ret;
is_devmem = false;
printf("\n\n");
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = ctrl_data;
msg.msg_controllen = sizeof(ctrl_data);
ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
printf("recvmsg ret=%ld\n", ret);
if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
continue;
if (ret < 0) {
perror("recvmsg");
continue;
}
if (ret == 0) {
printf("client exited\n");
goto cleanup;
}
i++;
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_SOCKET ||
(cm->cmsg_type != SCM_DEVMEM_DMABUF &&
cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
fprintf(stdout, "skipping non-devmem cmsg\n");
continue;
}
dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
is_devmem = true;
if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
/* TODO: process data copied from skb's linear
* buffer.
*/
fprintf(stdout,
"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
dmabuf_cmsg->frag_size);
continue;
}
token.token_start = dmabuf_cmsg->frag_token;
token.token_count = 1;
total_received += dmabuf_cmsg->frag_size;
printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
dmabuf_cmsg->frag_offset % getpagesize(),
dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size,
dmabuf_cmsg->frag_token, total_received,
dmabuf_cmsg->dmabuf_id);
if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
error(1, 0,
"received on wrong dmabuf_id: flow steering error\n");
if (dmabuf_cmsg->frag_size % getpagesize())
non_page_aligned_frags++;
else
page_aligned_frags++;
sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START;
ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
if (do_validation)
validate_buffer(
((unsigned char *)buf_mem) +
dmabuf_cmsg->frag_offset,
dmabuf_cmsg->frag_size);
else
print_nonzero_bytes(
((unsigned char *)buf_mem) +
dmabuf_cmsg->frag_offset,
dmabuf_cmsg->frag_size);
sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END;
ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
ret = setsockopt(client_fd, SOL_SOCKET,
SO_DEVMEM_DONTNEED, &token,
sizeof(token));
if (ret != 1)
error(1, 0,
"SO_DEVMEM_DONTNEED not enough tokens");
}
if (!is_devmem)
error(1, 0, "flow steering error\n");
printf("total_received=%lu\n", total_received);
}
fprintf(stdout, "%s: ok\n", TEST_PREFIX);
fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
page_aligned_frags, non_page_aligned_frags);
fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
page_aligned_frags, non_page_aligned_frags);
cleanup:
munmap(buf_mem, dmabuf_size);
close(client_fd);
close(socket_fd);
close(buf);
close(memfd);
close(devfd);
ynl_sock_destroy(ys);
return 0;
}
void run_devmem_tests(void)
{
struct netdev_queue_id *queues;
int devfd, memfd, buf;
struct ynl_sock *ys;
size_t dmabuf_size;
size_t i = 0;
dmabuf_size = getpagesize() * NUM_PAGES;
create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
/* Configure RSS to divert all traffic from our devmem queues */
if (configure_rss())
error(1, 0, "rss error\n");
queues = calloc(num_queues, sizeof(*queues));
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
error(1, 0, "Binding empty queues array should have failed\n");
for (i = 0; i < num_queues; i++) {
queues[i]._present.type = 1;
queues[i]._present.id = 1;
queues[i].type = NETDEV_QUEUE_TYPE_RX;
queues[i].id = start_queue + i;
}
if (configure_headersplit(0))
error(1, 0, "Failed to configure header split\n");
if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
error(1, 0, "Configure dmabuf with header split off should have failed\n");
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
for (i = 0; i < num_queues; i++) {
queues[i]._present.type = 1;
queues[i]._present.id = 1;
queues[i].type = NETDEV_QUEUE_TYPE_RX;
queues[i].id = start_queue + i;
}
if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
error(1, 0, "Failed to bind\n");
/* Deactivating a bound queue should not be legal */
if (!configure_channels(num_queues, num_queues - 1))
error(1, 0, "Deactivating a bound queue should be illegal.\n");
/* Closing the netlink socket does an implicit unbind */
ynl_sock_destroy(ys);
}
int main(int argc, char *argv[])
{
int is_server = 0, opt;
while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
switch (opt) {
case 'l':
is_server = 1;
break;
case 's':
server_ip = optarg;
break;
case 'c':
client_ip = optarg;
break;
case 'p':
port = optarg;
break;
case 'v':
do_validation = atoll(optarg);
break;
case 'q':
num_queues = atoi(optarg);
break;
case 't':
start_queue = atoi(optarg);
break;
case 'f':
ifname = optarg;
break;
case '?':
printf("unknown option: %c\n", optopt);
break;
}
}
ifindex = if_nametoindex(ifname);
for (; optind < argc; optind++)
printf("extra arguments: %s\n", argv[optind]);
run_devmem_tests();
if (is_server)
return do_server();
return 0;
}